Restructure integration tests to use separate JSON and corpus files

This makes these tests way easier to write and read.
2017-03-09 11:49:30 -08:00 · 2017-03-09 11:49:30 -08:00 · ac4167fdc9
commit ac4167fdc9
parent f049d5d94c
54 changed files with 1424 additions and 1211 deletions
--- a/spec/fixtures/external_scanners/extra_external_tokens.c
+++ b/spec/fixtures/external_scanners/extra_external_tokens.c
@ -1,42 +0,0 @@
-#include <tree_sitter/parser.h>
-
-enum {
-  COMMENT,
-};
-
-void *tree_sitter_extra_external_tokens_external_scanner_create() {
-  return NULL;
-}
-
-void tree_sitter_extra_external_tokens_external_scanner_reset(void *payload) {
-}
-
-bool tree_sitter_extra_external_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) {
-  return true;
-}
-
-void tree_sitter_extra_external_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {
-}
-
-bool tree_sitter_extra_external_tokens_external_scanner_scan(
-  void *payload, TSLexer *lexer, const bool *whitelist) {
-
-  while (lexer->lookahead == ' ') {
-    lexer->advance(lexer, true);
-  }
-
-  if (lexer->lookahead == '#') {
-    lexer->advance(lexer, false);
-    while (lexer->lookahead != '\n') {
-      lexer->advance(lexer, false);
-    }
-
-    lexer->result_symbol = COMMENT;
-    return true;
-  }
-
-  return false;
-}
-
-void tree_sitter_extra_external_tokens_external_scanner_destroy(void *payload) {
-}
--- a/spec/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/corpus.txt
+++ b/spec/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/corpus.txt
@ -0,0 +1,32 @@
+================================================
+anonymous tokens defined with character classes
+================================================
+1234
+---
+
+(first_rule)
+
+=================================================
+anonymous tokens defined with LF escape sequence
+=================================================
+
+
+---
+
+(first_rule)
+
+=================================================
+anonymous tokens defined with CR escape sequence
+=================================================
+
+---
+
+(first_rule)
+
+================================================
+anonymous tokens with quotes
+================================================
+'hello'
+---
+
+(first_rule)
--- a/spec/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/grammar.json
+++ b/spec/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/grammar.json
@ -0,0 +1,14 @@
+{
+  "name": "anonymous_tokens_with_escaped_chars",
+  "rules": {
+    "first_rule": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "STRING", "value": "\n"},
+        {"type": "STRING", "value": "\r"},
+        {"type": "STRING", "value": "'hello'"},
+        {"type": "PATTERN", "value": "\\d+"}
+      ]
+    }
+  }
+}
--- a/spec/fixtures/test_grammars/associativity_left/corpus.txt
+++ b/spec/fixtures/test_grammars/associativity_left/corpus.txt
@ -0,0 +1,8 @@
+===================
+chained operations
+===================
+x+y+z
+---
+(expression (math_operation
+  (expression (math_operation (expression (identifier)) (expression (identifier))))
+  (expression (identifier))))
--- a/spec/fixtures/test_grammars/associativity_left/grammar.json
+++ b/spec/fixtures/test_grammars/associativity_left/grammar.json
@ -0,0 +1,31 @@
+{
+  "name": "associativity_left",
+
+  "rules": {
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "math_operation"},
+        {"type": "SYMBOL", "name": "identifier"}
+      ]
+    },
+
+    "math_operation": {
+      "type": "PREC_LEFT",
+      "value": 0,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "SYMBOL", "name": "expression"},
+          {"type": "STRING", "value": "+"},
+          {"type": "SYMBOL", "name": "expression"}
+        ]
+      }
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-zA-Z]+"
+    }
+  }
+}
--- a/spec/fixtures/test_grammars/associativity_missing/expected_error.txt
+++ b/spec/fixtures/test_grammars/associativity_missing/expected_error.txt
@ -0,0 +1,13 @@
+Unresolved conflict for symbol sequence:
+
+  expression  '+'  expression  •  '+'  …
+
+Possible interpretations:
+
+  1:  (math_operation  expression  '+'  expression)  •  '+'  …
+  2:  expression  '+'  (math_operation  expression  •  '+'  expression)
+
+Possible resolutions:
+
+  1:  Specify a left or right associativity in `math_operation`
+  2:  Add a conflict for these rules: `math_operation`
--- a/spec/fixtures/test_grammars/associativity_missing/grammar.json
+++ b/spec/fixtures/test_grammars/associativity_missing/grammar.json
@ -0,0 +1,27 @@
+{
+  "name": "associativity_missing",
+
+  "rules": {
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "math_operation"},
+        {"type": "SYMBOL", "name": "identifier"}
+      ]
+    },
+
+    "math_operation": {
+      "type": "SEQ",
+      "members": [
+        {"type": "SYMBOL", "name": "expression"},
+        {"type": "STRING", "value": "+"},
+        {"type": "SYMBOL", "name": "expression"}
+      ]
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-zA-Z]+"
+    }
+  }
+}
--- a/spec/fixtures/test_grammars/associativity_right/corpus.txt
+++ b/spec/fixtures/test_grammars/associativity_right/corpus.txt
@ -0,0 +1,8 @@
+===================
+chained operations
+===================
+x+y+z
+---
+(expression (math_operation
+  (expression (identifier))
+  (expression (math_operation (expression (identifier)) (expression (identifier))))))
--- a/spec/fixtures/test_grammars/associativity_right/grammar.json
+++ b/spec/fixtures/test_grammars/associativity_right/grammar.json
@ -0,0 +1,31 @@
+{
+  "name": "associativity_right",
+
+  "rules": {
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "math_operation"},
+        {"type": "SYMBOL", "name": "identifier"}
+      ]
+    },
+
+    "math_operation": {
+      "type": "PREC_RIGHT",
+      "value": 0,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "SYMBOL", "name": "expression"},
+          {"type": "STRING", "value": "+"},
+          {"type": "SYMBOL", "name": "expression"}
+        ]
+      }
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-zA-Z]+"
+    }
+  }
+}
--- a/spec/fixtures/test_grammars/conflicting_precedence/expected_error.txt
+++ b/spec/fixtures/test_grammars/conflicting_precedence/expected_error.txt
@ -0,0 +1,15 @@
+Unresolved conflict for symbol sequence:
+
+  expression  '+'  expression  •  '*'  …
+
+Possible interpretations:
+
+  1:  (sum  expression  '+'  expression)  •  '*'  …
+  2:  expression  '+'  (product  expression  •  '*'  expression)
+  3:  expression  '+'  (other_thing  expression  •  '*'  '*')
+
+Possible resolutions:
+
+  1:  Specify a higher precedence in `product` and `other_thing` than in the other rules.
+  2:  Specify a higher precedence in `sum` than in the other rules.
+  3:  Add a conflict for these rules: `sum` `product` `other_thing`
--- a/spec/fixtures/test_grammars/conflicting_precedence/grammar.json
+++ b/spec/fixtures/test_grammars/conflicting_precedence/grammar.json
@ -0,0 +1,58 @@
+{
+  "name": "conflicting_precedence",
+
+  "rules": {
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "sum"},
+        {"type": "SYMBOL", "name": "product"},
+        {"type": "SYMBOL", "name": "other_thing"}
+      ]
+    },
+
+    "sum": {
+      "type": "PREC_LEFT",
+      "value": 0,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "SYMBOL", "name": "expression"},
+          {"type": "STRING", "value": "+"},
+          {"type": "SYMBOL", "name": "expression"}
+        ]
+      }
+    },
+
+    "product": {
+      "type": "PREC_LEFT",
+      "value": 1,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "SYMBOL", "name": "expression"},
+          {"type": "STRING", "value": "*"},
+          {"type": "SYMBOL", "name": "expression"}
+        ]
+      }
+    },
+
+    "other_thing": {
+      "type": "PREC_LEFT",
+      "value": -1,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "SYMBOL", "name": "expression"},
+          {"type": "STRING", "value": "*"},
+          {"type": "STRING", "value": "*"}
+        ]
+      }
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-zA-Z]+"
+    }
+  }
+}
--- a/spec/fixtures/test_grammars/epsilon_rules/expected_error.txt
+++ b/spec/fixtures/test_grammars/epsilon_rules/expected_error.txt
@ -0,0 +1,2 @@
+The rule `rule_2` matches the empty string.
+Tree-sitter currently does not support syntactic rules that match the empty string.
--- a/spec/fixtures/test_grammars/epsilon_rules/grammar.json
+++ b/spec/fixtures/test_grammars/epsilon_rules/grammar.json
@ -0,0 +1,15 @@
+{
+  "name": "epsilon_rules",
+
+  "rules": {
+    "rule_1": {"type": "SYMBOL", "name": "rule_2"},
+
+    "rule_2": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "rule_1"},
+        {"type": "BLANK"}
+      ]
+    }
+  }
+}
--- a/spec/fixtures/test_grammars/external_and_internal_tokens/corpus.txt
+++ b/spec/fixtures/test_grammars/external_and_internal_tokens/corpus.txt
@ -0,0 +1,41 @@
+=========================================
+single-line statements - internal tokens
+=========================================
+
+a b
+
+---
+
+(statement (variable) (variable) (line_break))
+
+=========================================
+multi-line statements - internal tokens
+=========================================
+
+a
+b
+
+---
+
+(statement (variable) (variable) (line_break))
+
+=========================================
+single-line statements - external tokens
+=========================================
+
+'hello' 'world'
+
+---
+
+(statement (string) (string) (line_break))
+
+=========================================
+multi-line statements - external tokens
+=========================================
+
+'hello'
+'world'
+
+---
+
+(statement (string) (string) (line_break))
--- a/spec/fixtures/test_grammars/external_and_internal_tokens/grammar.json
+++ b/spec/fixtures/test_grammars/external_and_internal_tokens/grammar.json
@ -0,0 +1,36 @@
+{
+  "name": "external_and_internal_tokens",
+
+  "externals": [
+    "string",
+    "line_break"
+  ],
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"}
+  ],
+
+  "rules": {
+    "statement": {
+      "type": "SEQ",
+      "members": [
+        {"type": "SYMBOL", "name": "_expression"},
+        {"type": "SYMBOL", "name": "_expression"},
+        {"type": "SYMBOL", "name": "line_break"}
+      ]
+    },
+
+    "_expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "string"},
+        {"type": "SYMBOL", "name": "variable"},
+        {"type": "SYMBOL", "name": "number"}
+      ]
+    },
+
+    "variable": {"type": "PATTERN", "value": "\\a+"},
+    "number": {"type": "PATTERN", "value": "\\d+"},
+    "line_break": {"type": "STRING", "value": "\n"}
+  }
+}
--- a/spec/fixtures/test_grammars/external_and_internal_tokens/readme.md
+++ b/spec/fixtures/test_grammars/external_and_internal_tokens/readme.md
@ -0,0 +1 @@
+This grammar has an external scanner whose `scan` method needs to be able to check for the validity of an *internal* token. This is done by including the names of that internal token (`_line_break`) in the grammar's `externals` field.
--- a/spec/fixtures/test_grammars/external_and_internal_tokens/scanner.c
+++ b/spec/fixtures/test_grammars/external_and_internal_tokens/scanner.c
@ -1,4 +1,3 @@
-#include <stdbool.h>
 #include <tree_sitter/parser.h>

 enum {
@ -6,21 +5,17 @@ enum {
  LINE_BREAK
 };

-void *tree_sitter_shared_external_tokens_external_scanner_create() {
-  return NULL;
-}
+void *tree_sitter_external_and_internal_tokens_external_scanner_create() { return NULL; }

-void tree_sitter_shared_external_tokens_external_scanner_reset(void *payload) {
-}
+void tree_sitter_external_and_internal_tokens_external_scanner_destroy(void *payload) {}

-bool tree_sitter_shared_external_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) {
-  return true;
-}
+void tree_sitter_external_and_internal_tokens_external_scanner_reset(void *payload) {}

-void tree_sitter_shared_external_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {
-}
+bool tree_sitter_external_and_internal_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) { return true; }

-bool tree_sitter_shared_external_tokens_external_scanner_scan(
+void tree_sitter_external_and_internal_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {}
+
+bool tree_sitter_external_and_internal_tokens_external_scanner_scan(
  void *payload, TSLexer *lexer, const bool *whitelist) {

  // If a line-break is a valid lookahead token, only skip spaces.
@ -58,6 +53,3 @@ bool tree_sitter_shared_external_tokens_external_scanner_scan(

  return false;
 }
-
-void tree_sitter_shared_external_tokens_external_scanner_destroy(void *payload) {
-}
--- a/spec/fixtures/test_grammars/external_extra_tokens/corpus.txt
+++ b/spec/fixtures/test_grammars/external_extra_tokens/corpus.txt
@ -0,0 +1,10 @@
+========================
+extra external tokens
+========================
+
+x = # a comment
+y
+
+---
+
+(assignment (variable) (comment) (variable))
--- a/spec/fixtures/test_grammars/external_extra_tokens/grammar.json
+++ b/spec/fixtures/test_grammars/external_extra_tokens/grammar.json
@ -0,0 +1,25 @@
+{
+  "name": "external_extra_tokens",
+
+  "externals": [
+    "comment"
+  ],
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"},
+    {"type": "SYMBOL", "name": "comment"}
+  ],
+
+  "rules": {
+    "assignment": {
+      "type": "SEQ",
+      "members": [
+        {"type": "SYMBOL", "name": "variable"},
+        {"type": "STRING", "value": "="},
+        {"type": "SYMBOL", "name": "variable"}
+      ]
+    },
+
+    "variable": {"type": "PATTERN", "value": "\\a+"}
+  }
+}
--- a/spec/fixtures/test_grammars/external_extra_tokens/scanner.c
+++ b/spec/fixtures/test_grammars/external_extra_tokens/scanner.c
@ -0,0 +1,36 @@
+#include <tree_sitter/parser.h>
+
+enum {
+  COMMENT,
+};
+
+void *tree_sitter_external_extra_tokens_external_scanner_create() { return NULL; }
+
+void tree_sitter_external_extra_tokens_external_scanner_destroy(void *payload) {}
+
+void tree_sitter_external_extra_tokens_external_scanner_reset(void *payload) {}
+
+bool tree_sitter_external_extra_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) { return true; }
+
+void tree_sitter_external_extra_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {}
+
+bool tree_sitter_external_extra_tokens_external_scanner_scan(
+  void *payload, TSLexer *lexer, const bool *whitelist) {
+
+  while (lexer->lookahead == ' ') {
+    lexer->advance(lexer, true);
+  }
+
+  if (lexer->lookahead == '#') {
+    lexer->advance(lexer, false);
+    while (lexer->lookahead != '\n') {
+      lexer->advance(lexer, false);
+    }
+
+    lexer->result_symbol = COMMENT;
+    return true;
+  }
+
+  return false;
+}
+
--- a/spec/fixtures/test_grammars/external_tokens/corpus.txt
+++ b/spec/fixtures/test_grammars/external_tokens/corpus.txt
@ -0,0 +1,22 @@
+========================
+simple external tokens
+=========================
+
+x + %(sup (external) scanner?)
+
+---
+
+(expression (sum (expression (identifier)) (expression (string))))
+
+==================================
+external tokens that require state
+==================================
+
+%{sup {} #{x + y} {} scanner?}
+
+---
+
+(expression (string
+  (expression (sum
+    (expression (identifier))
+    (expression (identifier))))))
--- a/spec/fixtures/test_grammars/external_tokens/grammar.json
+++ b/spec/fixtures/test_grammars/external_tokens/grammar.json
@ -0,0 +1,57 @@
+{
+  "name": "external_tokens",
+
+  "externals": [
+    "_percent_string",
+    "_percent_string_start",
+    "_percent_string_end"
+  ],
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"}
+  ],
+
+  "rules": {
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "string"},
+        {"type": "SYMBOL", "name": "sum"},
+        {"type": "SYMBOL", "name": "identifier"}
+      ]
+    },
+
+    "sum": {
+      "type": "PREC_LEFT",
+      "value": 0,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "SYMBOL", "name": "expression"},
+          {"type": "STRING", "value": "+"},
+          {"type": "SYMBOL", "name": "expression"}
+        ]
+      }
+    },
+
+    "string": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "_percent_string"},
+        {
+          "type": "SEQ",
+          "members": [
+            {"type": "SYMBOL", "name": "_percent_string_start"},
+            {"type": "SYMBOL", "name": "expression"},
+            {"type": "SYMBOL", "name": "_percent_string_end"}
+          ]
+        },
+      ]
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "\\a+"
+    }
+  }
+}
--- a/spec/fixtures/test_grammars/external_tokens/scanner.c
+++ b/spec/fixtures/test_grammars/external_tokens/scanner.c
@ -1,4 +1,3 @@
-#include <stdbool.h>
 #include <tree_sitter/parser.h>

 enum {
@ -13,7 +12,7 @@ typedef struct {
  uint32_t depth;
 } Scanner;

-void *tree_sitter_external_scanner_example_external_scanner_create() {
+void *tree_sitter_external_tokens_external_scanner_create() {
  Scanner *scanner = malloc(sizeof(Scanner));
  *scanner = (Scanner){
    .open_delimiter = 0,
@ -23,7 +22,17 @@ void *tree_sitter_external_scanner_example_external_scanner_create() {
  return scanner;
 }

-bool tree_sitter_external_scanner_example_external_scanner_scan(
+void tree_sitter_external_tokens_external_scanner_destroy(void *payload) {
+  free(payload);
+}
+
+void tree_sitter_external_tokens_external_scanner_reset(void *payload) {}
+
+bool tree_sitter_external_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) { return true; }
+
+void tree_sitter_external_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {}
+
+bool tree_sitter_external_tokens_external_scanner_scan(
  void *payload, TSLexer *lexer, const bool *whitelist) {
  Scanner *scanner = payload;

@ -103,16 +112,3 @@ bool tree_sitter_external_scanner_example_external_scanner_scan(
  return false;
 }

-void tree_sitter_external_scanner_example_external_scanner_reset(void *payload) {
-}
-
-bool tree_sitter_external_scanner_example_external_scanner_serialize(void *payload, TSExternalTokenState state) {
-  return true;
-}
-
-void tree_sitter_external_scanner_example_external_scanner_deserialize(void *payload, TSExternalTokenState state) {
-}
-
-void tree_sitter_external_scanner_example_external_scanner_destroy(void *payload) {
-  free(payload);
-}
--- a/spec/fixtures/test_grammars/precedence_on_single_child_missing/expected_error.txt
+++ b/spec/fixtures/test_grammars/precedence_on_single_child_missing/expected_error.txt
@ -0,0 +1,15 @@
+Unresolved conflict for symbol sequence:
+
+  identifier  •  '{'  …
+
+Possible interpretations:
+
+  1:  (expression  identifier)  •  '{'  …
+  2:  (function_call  identifier  •  block)
+
+Possible resolutions:
+
+  1:  Specify a higher precedence in `function_call` than in the other rules.
+  2:  Specify a higher precedence in `expression` than in the other rules.
+  3:  Specify a left or right associativity in `expression`
+  4:  Add a conflict for these rules: `expression` `function_call`
--- a/spec/fixtures/test_grammars/precedence_on_single_child_missing/grammar.json
+++ b/spec/fixtures/test_grammars/precedence_on_single_child_missing/grammar.json
@ -0,0 +1,63 @@
+{
+  "name": "precedence_on_single_child_missing",
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"}
+  ],
+
+  "rules": {
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "function_call"},
+        {"type": "SYMBOL", "name": "identifier"}
+      ]
+    },
+
+    "function_call": {
+      "type": "PREC_RIGHT",
+      "value": 0,
+      "content": {
+        "type": "CHOICE",
+        "members": [
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "expression"}
+            ]
+          },
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "block"}
+            ]
+          },
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "expression"},
+              {"type": "SYMBOL", "name": "block"}
+            ]
+          }
+        ]
+      }
+    },
+
+    "block": {
+      "type": "SEQ",
+      "members": [
+        {"type": "STRING", "value": "{"},
+        {"type": "SYMBOL", "name": "expression"},
+        {"type": "STRING", "value": "}"}
+      ]
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-zA-Z]+"
+    }
+  }
+}
--- a/spec/fixtures/test_grammars/precedence_on_single_child_missing/readme.md
+++ b/spec/fixtures/test_grammars/precedence_on_single_child_missing/readme.md
@ -0,0 +1,14 @@
+This language has function calls similar to Ruby's, with no parentheses required, and optional blocks.
+
+There is a shift/reduce conflict here:
+
+```
+foo bar { baz }
+       ^
+```
+
+The possible actions are:
+1. `reduce(expression, 1)` - `bar` is an expression being passed to the `foo` function.
+2. `shift` - `bar` is a function being called with the block `{ baz }`
+
+The grammars `precedence_on_single_child_negative` and `precedence_on_single_child_positive` show possible resolutions to this conflict.
--- a/spec/fixtures/test_grammars/precedence_on_single_child_negative/corpus.txt
+++ b/spec/fixtures/test_grammars/precedence_on_single_child_negative/corpus.txt
@ -0,0 +1,12 @@
+===========================
+function calls with blocks
+===========================
+
+foo bar { baz }
+
+---
+
+(expression (function_call
+  (identifier)
+  (expression (identifier))
+  (block (expression (identifier)))))
--- a/spec/fixtures/test_grammars/precedence_on_single_child_negative/grammar.json
+++ b/spec/fixtures/test_grammars/precedence_on_single_child_negative/grammar.json
@ -0,0 +1,63 @@
+{
+  "name": "precedence_on_single_child_negative",
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"}
+  ],
+
+  "rules": {
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "function_call"},
+        {"type": "SYMBOL", "name": "identifier"}
+      ]
+    },
+
+    "function_call": {
+      "type": "PREC_RIGHT",
+      "value": -1,
+      "content": {
+        "type": "CHOICE",
+        "members": [
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "expression"}
+            ]
+          },
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "block"}
+            ]
+          },
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "expression"},
+              {"type": "SYMBOL", "name": "block"}
+            ]
+          }
+        ]
+      }
+    },
+
+    "block": {
+      "type": "SEQ",
+      "members": [
+        {"type": "STRING", "value": "{"},
+        {"type": "SYMBOL", "name": "expression"},
+        {"type": "STRING", "value": "}"}
+      ]
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-zA-Z]+"
+    }
+  }
+}
--- a/spec/fixtures/test_grammars/precedence_on_single_child_negative/readme.md
+++ b/spec/fixtures/test_grammars/precedence_on_single_child_negative/readme.md
@ -0,0 +1 @@
+This grammar resolves the conflict shown in the `precedence_on_single_child_missing` grammar by giving `function_call` a negative precedence. This causes reducing the `bar` variable to an expression to be preferred over shifting the `{` token as part of `function_call`.
--- a/spec/fixtures/test_grammars/precedence_on_single_child_positive/corpus.txt
+++ b/spec/fixtures/test_grammars/precedence_on_single_child_positive/corpus.txt
@ -0,0 +1,13 @@
+===========================
+function calls with blocks
+===========================
+
+foo bar { baz }
+
+---
+
+(expression (function_call
+  (identifier)
+  (expression (function_call
+    (identifier)
+    (block (expression (identifier)))))))
--- a/spec/fixtures/test_grammars/precedence_on_single_child_positive/grammar.json
+++ b/spec/fixtures/test_grammars/precedence_on_single_child_positive/grammar.json
@ -0,0 +1,63 @@
+{
+  "name": "precedence_on_single_child_positive",
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"}
+  ],
+
+  "rules": {
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "function_call"},
+        {"type": "SYMBOL", "name": "identifier"}
+      ]
+    },
+
+    "function_call": {
+      "type": "PREC_RIGHT",
+      "value": 1,
+      "content": {
+        "type": "CHOICE",
+        "members": [
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "expression"}
+            ]
+          },
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "block"}
+            ]
+          },
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "expression"},
+              {"type": "SYMBOL", "name": "block"}
+            ]
+          }
+        ]
+      }
+    },
+
+    "block": {
+      "type": "SEQ",
+      "members": [
+        {"type": "STRING", "value": "{"},
+        {"type": "SYMBOL", "name": "expression"},
+        {"type": "STRING", "value": "}"}
+      ]
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-zA-Z]+"
+    }
+  }
+}
--- a/spec/fixtures/test_grammars/precedence_on_single_child_positive/readme.md
+++ b/spec/fixtures/test_grammars/precedence_on_single_child_positive/readme.md
@ -0,0 +1 @@
+This grammar resolves the conflict shown in the `precedence_on_single_child_missing` grammar by giving `function_call` a positive precedence. This causes shifting the `{` token as part of `function_call` to be preferred over reducing the `bar` variable to an expression.
--- a/spec/fixtures/test_grammars/precedence_on_subsequence/corpus.txt
+++ b/spec/fixtures/test_grammars/precedence_on_subsequence/corpus.txt
@ -0,0 +1,24 @@
+==========================================
+curly brace blocks with high precedence
+==========================================
+
+a b {}
+
+---
+
+(expression (function_call
+  (identifier)
+  (expression (function_call (identifier) (block)))))
+
+==========================================
+do blocks with low precedence
+==========================================
+
+a b do end
+
+---
+
+(expression (function_call
+  (identifier)
+  (expression (identifier))
+  (do_block)))
--- a/spec/fixtures/test_grammars/precedence_on_subsequence/grammar.json
+++ b/spec/fixtures/test_grammars/precedence_on_subsequence/grammar.json
@ -0,0 +1,135 @@
+{
+  "name": "precedence_on_subsequence",
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"}
+  ],
+
+  "rules": {
+    "expression": {
+      "type": "PREC_LEFT",
+      "value": 0,
+      "content": {
+        "type": "CHOICE",
+        "members": [
+          {"type": "SYMBOL", "name": "function_call"},
+          {"type": "SYMBOL", "name": "identifier"},
+          {"type": "SYMBOL", "name": "scope_resolution"}
+        ]
+      }
+    },
+
+    "function_call": {
+      "type": "CHOICE",
+      "members": [
+        {
+          "type": "SEQ",
+          "members": [
+            {"type": "SYMBOL", "name": "identifier"},
+            {"type": "SYMBOL", "name": "expression"}
+          ]
+        },
+
+        {
+          "type": "PREC",
+          "value": 1,
+          "content": {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "block"}
+            ]
+          }
+        },
+
+        {
+          "type": "PREC",
+          "value": -1,
+          "content": {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "do_block"}
+            ]
+          }
+        },
+
+        {
+          "type": "SEQ",
+          "members": [
+            {"type": "SYMBOL", "name": "identifier"},
+            {
+              "type": "PREC",
+              "value": 1,
+              "content": {
+                "type": "SEQ",
+                "members": [
+                  {"type": "SYMBOL", "name": "expression"},
+                  {"type": "SYMBOL", "name": "block"}
+                ]
+              }
+            }
+          ]
+        },
+
+        {
+          "type": "SEQ",
+          "members": [
+            {"type": "SYMBOL", "name": "identifier"},
+            {
+              "type": "PREC",
+              "value": -1,
+              "content": {
+                "type": "SEQ",
+                "members": [
+                  {"type": "SYMBOL", "name": "expression"},
+                  {"type": "SYMBOL", "name": "do_block"}
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+
+    "scope_resolution": {
+      "type": "PREC_LEFT",
+      "value": 1,
+      "content": {
+        "type": "CHOICE",
+        "members": [
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "expression"},
+              {"type": "STRING", "value": "::"},
+              {"type": "SYMBOL", "name": "expression"}
+            ]
+          },
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "STRING", "value": "::"},
+              {"type": "SYMBOL", "name": "expression"},
+            ]
+          }
+        ]
+      }
+    },
+
+    "block": {
+      "type": "STRING",
+      "value": "{}"
+    },
+
+    "do_block": {
+      "type": "STRING",
+      "value": "do end"
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-zA-Z]+"
+    }
+  }
+}
--- a/spec/fixtures/test_grammars/readme.md
+++ b/spec/fixtures/test_grammars/readme.md
@ -0,0 +1,3 @@
+These small grammars demonstrate specific features or test for certain specific regressions.
+
+For some of them, compilation is expected to fail with a given error message. For others, the resulting parser is expected to produce certain trees.
--- a/spec/fixtures/test_grammars/readme_grammar/corpus.txt
+++ b/spec/fixtures/test_grammars/readme_grammar/corpus.txt
@ -0,0 +1,13 @@
+==================================
+the readme example
+==================================
+
+a + b * c
+
+---
+
+(expression (sum
+  (expression (variable))
+  (expression (product
+     (expression (variable))
+     (expression (variable))))))
--- a/spec/fixtures/test_grammars/readme_grammar/grammar.json
+++ b/spec/fixtures/test_grammars/readme_grammar/grammar.json
@ -0,0 +1,67 @@
+{
+  "name": "readme_grammar",
+
+  // Things that can appear anywhere in the language, like comments
+  // and whitespace, are expressed as 'extras'.
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"},
+    {"type": "SYMBOL", "name": "comment"}
+  ],
+
+  "rules": {
+
+    // The first rule listed in the grammar becomes the 'start rule'.
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "sum"},
+        {"type": "SYMBOL", "name": "product"},
+        {"type": "SYMBOL", "name": "number"},
+        {"type": "SYMBOL", "name": "variable"},
+        {
+          "type": "SEQ",
+          "members": [
+            {"type": "STRING", "value": "("},
+            {"type": "SYMBOL", "name": "expression"},
+            {"type": "STRING", "value": ")"}
+          ]
+        }
+      ]
+    },
+
+    // Tokens like '+' and '*' are described directly within the
+    // grammar's rules, as opposed to in a seperate lexer description.
+    "sum": {
+      "type": "PREC_LEFT",
+      "value": 1,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "SYMBOL", "name": "expression"},
+          {"type": "STRING", "value": "+"},
+          {"type": "SYMBOL", "name": "expression"}
+        ]
+      }
+    },
+
+    // Ambiguities can be resolved at compile time by assigning precedence
+    // values to rule subtrees.
+    "product": {
+      "type": "PREC_LEFT",
+      "value": 2,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "SYMBOL", "name": "expression"},
+          {"type": "STRING", "value": "*"},
+          {"type": "SYMBOL", "name": "expression"}
+        ]
+      }
+    },
+
+    // Tokens can be specified using ECMAScript regexps.
+    "number": {"type": "PATTERN", "value": "\\d+"},
+    "comment": {"type": "PATTERN", "value": "#.*"},
+    "variable": {"type": "PATTERN", "value": "[a-zA-Z]\\w*"}
+  }
+}
--- a/spec/fixtures/test_grammars/start_rule_is_blank/corpus.txt
+++ b/spec/fixtures/test_grammars/start_rule_is_blank/corpus.txt
@ -0,0 +1,7 @@
+========================
+the empty string
+=======================
+
+---
+
+(first_rule)
--- a/spec/fixtures/test_grammars/start_rule_is_blank/grammar.json
+++ b/spec/fixtures/test_grammars/start_rule_is_blank/grammar.json
@ -0,0 +1,6 @@
+{
+  "name": "start_rule_is_blank",
+  "rules": {
+    "first_rule": {"type": "BLANK"}
+  }
+}
--- a/spec/fixtures/test_grammars/start_rule_is_token/corpus.txt
+++ b/spec/fixtures/test_grammars/start_rule_is_token/corpus.txt
@ -0,0 +1,6 @@
+===========================
+the single token
+==========================
+the-value
+---
+(first_rule)
--- a/spec/fixtures/test_grammars/start_rule_is_token/grammar.json
+++ b/spec/fixtures/test_grammars/start_rule_is_token/grammar.json
@ -0,0 +1,6 @@
+{
+  "name": "start_rule_is_token",
+  "rules": {
+    "first_rule": {"type": "STRING", "value": "the-value"}
+  }
+}
--- a/spec/helpers/file_helpers.cc
+++ b/spec/helpers/file_helpers.cc
@ -0,0 +1,61 @@
+#include "helpers/file_helpers.h"
+#include <sys/stat.h>
+#include <errno.h>
+#include <fstream>
+#include <dirent.h>
+
+using std::string;
+using std::ifstream;
+using std::istreambuf_iterator;
+using std::ofstream;
+using std::vector;
+
+bool file_exists(const string &path) {
+  struct stat file_stat;
+  return stat(path.c_str(), &file_stat) == 0;
+}
+
+int get_modified_time(const string &path) {
+  struct stat file_stat;
+  if (stat(path.c_str(), &file_stat) != 0) {
+    if (errno != ENOENT)
+      fprintf(stderr, "Error in stat() for path: %s\n", + path.c_str());
+    return 0;
+  }
+  return file_stat.st_mtime;
+}
+
+string read_file(const string &path) {
+  ifstream file(path);
+  istreambuf_iterator<char> file_iterator(file), end_iterator;
+  string content(file_iterator, end_iterator);
+  file.close();
+  return content;
+}
+
+void write_file(const string &path, const string &content) {
+  ofstream file(path);
+  file << content;
+  file.close();
+}
+
+vector<string> list_directory(const string &path) {
+  vector<string> result;
+
+  DIR *dir = opendir(path.c_str());
+  if (!dir) {
+    printf("\nTest error - no such directory '%s'", path.c_str());
+    return result;
+  }
+
+  struct dirent *dir_entry;
+  while ((dir_entry = readdir(dir))) {
+    string name(dir_entry->d_name);
+    if (name != "." && name != "..") {
+      result.push_back(name);
+    }
+  }
+
+  closedir(dir);
+  return result;
+}
--- a/spec/helpers/file_helpers.h
+++ b/spec/helpers/file_helpers.h
@ -0,0 +1,14 @@
+#ifndef HELPERS_FILE_HELPERS_H_
+#define HELPERS_FILE_HELPERS_H_
+
+#include <string>
+#include <vector>
+#include <sys/stat.h>
+
+bool file_exists(const std::string &path);
+int get_modified_time(const std::string &path);
+std::string read_file(const std::string &path);
+void write_file(const std::string &path, const std::string &content);
+std::vector<std::string> list_directory(const std::string &path);
+
+#endif  // HELPERS_FILE_HELPERS_H_
--- a/spec/helpers/load_language.cc
+++ b/spec/helpers/load_language.cc
@ -1,12 +1,12 @@
 #include "spec_helper.h"
 #include "helpers/load_language.h"
+#include "helpers/file_helpers.h"
 #include <unistd.h>
 #include <dlfcn.h>
 #include <sys/types.h>
 #include <sys/wait.h>
 #include <map>
 #include <string>
-#include <sys/stat.h>
 #include <fstream>
 #include <stdlib.h>
 #include "tree_sitter/compiler.h"
@ -54,25 +54,10 @@ static std::string run_command(const char *cmd, const char *args[]) {
  }
 }

-static bool file_exists(const string &path) {
-  struct stat file_stat;
-  return stat(path.c_str(), &file_stat) == 0;
-}
-
-static int get_modified_time(const string &path) {
-  struct stat file_stat;
-  if (stat(path.c_str(), &file_stat) != 0) {
-    if (errno != ENOENT)
-      fprintf(stderr, "Error in stat() for path: %s\n", + path.c_str());
-    return 0;
-  }
-  return file_stat.st_mtime;
-}
-
-const TSLanguage *load_language(const string &source_filename,
-                                const string &lib_filename,
-                                const string &language_name,
-                                string external_scanner_filename = "") {
+static const TSLanguage *load_language(const string &source_filename,
+                                       const string &lib_filename,
+                                       const string &language_name,
+                                       string external_scanner_filename = "") {
  string language_function_name = "tree_sitter_" + language_name;
  string header_dir = getenv("PWD") + string("/include");
  int source_mtime = get_modified_time(source_filename);
@ -132,9 +117,9 @@ const TSLanguage *load_language(const string &source_filename,
  return reinterpret_cast<TSLanguage *(*)()>(language_function)();
 }

-const TSLanguage *load_compile_result(const string &name,
-                                      const TSCompileResult &compile_result,
-                                      string external_scanner_path) {
+const TSLanguage *load_test_language(const string &name,
+                                     const TSCompileResult &compile_result,
+                                     string external_scanner_path) {
  if (compile_result.error_type != TSCompileErrorTypeNone) {
    Assert::Failure(string("Compilation failed ") + compile_result.error_message);
    return nullptr;
@ -155,7 +140,7 @@ const TSLanguage *load_compile_result(const string &name,
  return language;
 }

-const TSLanguage *get_test_language(const string &language_name) {
+const TSLanguage *load_real_language(const string &language_name) {
  if (loaded_languages[language_name])
    return loaded_languages[language_name];

@ -182,20 +167,14 @@ const TSLanguage *get_test_language(const string &language_name) {
  if (parser_mtime < grammar_mtime || parser_mtime < libcompiler_mtime) {
    printf("\n" "Regenerating the %s parser...\n", language_name.c_str());

-    ifstream grammar_file(grammar_filename);
-    istreambuf_iterator<char> grammar_file_iterator(grammar_file), end_iterator;
-    string grammar_json(grammar_file_iterator, end_iterator);
-    grammar_file.close();
-
+    string grammar_json = read_file(grammar_filename);
    TSCompileResult result = ts_compile_grammar(grammar_json.c_str());
    if (result.error_type != TSCompileErrorTypeNone) {
      fprintf(stderr, "Failed to compile %s grammar: %s\n", language_name.c_str(), result.error_message);
      return nullptr;
    }

-    ofstream parser_file(parser_filename);
-    parser_file << result.code;
-    parser_file.close();
+    write_file(parser_filename, result.code);
  }

  mkdir("out/tmp", 0777);
--- a/spec/helpers/load_language.h
+++ b/spec/helpers/load_language.h
@ -5,8 +5,10 @@
 #include "tree_sitter/runtime.h"
 #include <string>

-const TSLanguage *load_compile_result(const std::string &, const TSCompileResult &,
-                                      std::string external_scanner_path = "");
-const TSLanguage *get_test_language(const std::string &language_name);
+const TSLanguage *load_real_language(const std::string &name);
+
+const TSLanguage *load_test_language(const std::string &name,
+                                     const TSCompileResult &compile_result,
+                                     std::string external_scanner_path = "");

 #endif  // HELPERS_LOAD_LANGUAGE_H_
--- a/spec/helpers/read_test_entries.cc
+++ b/spec/helpers/read_test_entries.cc
@ -1,20 +1,18 @@
 #include "helpers/read_test_entries.h"
+#include <assert.h>
 #include <string>
-#include <fstream>
-#include <streambuf>
-#include <dirent.h>
-
 #include <regex>
+#include "helpers/file_helpers.h"
+
 using std::regex;
 using std::regex_search;
 using std::regex_replace;
-using std::smatch;
 using std::regex_constants::extended;
-
+using std::smatch;
 using std::string;
 using std::vector;
-using std::ifstream;
-using std::istreambuf_iterator;
+
+string fixtures_dir = "spec/fixtures/";

 static string trim_output(const string &input) {
  string result(input);
@ -27,7 +25,7 @@ static string trim_output(const string &input) {

 static vector<TestEntry> parse_test_entries(string content) {
  regex header_pattern("===+\n"  "([^=]+)\n"  "===+\n", extended);
-  regex separator_pattern("---+\n", extended);
+  regex separator_pattern("---+\r?\n", extended);
  vector<string> descriptions;
  vector<string> bodies;

@ -55,51 +53,42 @@ static vector<TestEntry> parse_test_entries(string content) {
        body.substr(0, matches.position() - 1),
        trim_output(body.substr(matches.position() + matches[0].length()))
      });
+    } else {
+      puts(("Invalid corpus entry with description: " + descriptions[i]).c_str());
+      abort();
    }
  }

  return result;
 }

-static vector<string> list_directory(string dir_name) {
-  vector<string> result;
-
-  DIR *dir = opendir(dir_name.c_str());
-  if (!dir) {
-    printf("\nTest error - no such directory '%s'", dir_name.c_str());
-    return result;
-  }
-
-  struct dirent *dir_entry;
-  while ((dir_entry = readdir(dir))) {
-    string name(dir_entry->d_name);
-    if (name != "." && name != "..")
-      result.push_back(dir_name + "/" + name);
-  }
-
-  closedir(dir);
-  return result;
-}
-
-static string read_file(string filename) {
-  ifstream file(filename);
-  string result((istreambuf_iterator<char>(file)), istreambuf_iterator<char>());
-  return result;
-}
-
-vector<TestEntry> read_corpus_entries(string language_name) {
+vector<TestEntry> read_real_language_corpus(string language_name) {
  vector<TestEntry> result;

-  string fixtures_dir = "spec/fixtures/";
-
  string test_directory = fixtures_dir + "grammars/" + language_name + "/grammar_test";
-  for (string &test_filename : list_directory(test_directory))
-    for (TestEntry &entry : parse_test_entries(read_file(test_filename)))
+  for (string &test_filename : list_directory(test_directory)) {
+    for (TestEntry &entry : parse_test_entries(read_file(test_directory + "/" + test_filename))) {
      result.push_back(entry);
+    }
+  }

  string error_test_filename = fixtures_dir + "/error_corpus/" + language_name + "_errors.txt";
-  for (TestEntry &entry : parse_test_entries(read_file(error_test_filename)))
+  for (TestEntry &entry : parse_test_entries(read_file(error_test_filename))) {
    result.push_back(entry);
+  }

  return result;
 }
+
+vector<TestEntry> read_test_language_corpus(string language_name) {
+  vector<TestEntry> result;
+
+  string test_directory = fixtures_dir + "test_grammars/" + language_name;
+  for (string &test_filename : list_directory(test_directory)) {
+    for (TestEntry &entry : parse_test_entries(read_file(test_directory + "/" + test_filename))) {
+      result.push_back(entry);
+    }
+  }
+
+  return result;
+}
--- a/spec/helpers/read_test_entries.h
+++ b/spec/helpers/read_test_entries.h
@ -10,6 +10,7 @@ struct TestEntry {
 	std::string tree_string;
 };

-std::vector<TestEntry> read_corpus_entries(std::string directory);
+std::vector<TestEntry> read_real_language_corpus(std::string name);
+std::vector<TestEntry> read_test_language_corpus(std::string name);

 #endif
--- a/spec/integration/compile_grammar_spec.cc
+++ b/spec/integration/compile_grammar_spec.cc
@ -1,847 +0,0 @@
-#include "spec_helper.h"
-#include "runtime/alloc.h"
-#include "helpers/load_language.h"
-#include "helpers/stderr_logger.h"
-#include "helpers/dedent.h"
-#include "compiler/util/string_helpers.h"
-#include <map>
-
-static string fill_template(string input, map<string, string> parameters) {
-  string result = input;
-  for (const auto &pair : parameters) {
-    util::str_replace(&result, "{{" + pair.first + "}}", pair.second);
-  }
-  return result;
-}
-
-START_TEST
-
-describe("compile_grammar", []() {
-  TSDocument *document;
-
-  before_each([&]() {
-    document = ts_document_new();
-  });
-
-  after_each([&]() {
-    ts_document_free(document);
-  });
-
-  auto assert_root_node = [&](const string &expected_string) {
-    TSNode root_node = ts_document_root_node(document);
-    char *node_string = ts_node_string(root_node, document);
-    AssertThat(node_string, Equals(expected_string));
-    ts_free(node_string);
-  };
-
-  describe("conflicts", [&]() {
-    it("can resolve shift/reduce conflicts using associativities", [&]() {
-      string grammar_template = R"JSON({
-        "name": "associativity_example",
-
-        "rules": {
-          "expression": {
-            "type": "CHOICE",
-            "members": [
-              {"type": "SYMBOL", "name": "math_operation"},
-              {"type": "SYMBOL", "name": "identifier"}
-            ]
-          },
-
-          "math_operation": {
-            "type": "{{math_operation_prec_type}}",
-            "value": 0,
-            "content": {
-              "type": "SEQ",
-              "members": [
-                {"type": "SYMBOL", "name": "expression"},
-                {"type": "STRING", "value": "+"},
-                {"type": "SYMBOL", "name": "expression"}
-              ]
-            }
-          },
-
-          "identifier": {
-            "type": "PATTERN",
-            "value": "[a-zA-Z]+"
-          }
-        }
-      })JSON";
-
-      // Ambiguity, which '+' applies first?
-      ts_document_set_input_string(document, "x+y+z");
-
-      TSCompileResult result = ts_compile_grammar(fill_template(grammar_template, {
-        {"math_operation_prec_type", "PREC"}
-      }).c_str());
-
-      AssertThat(result.error_message, Equals(dedent(R"MESSAGE(
-        Unresolved conflict for symbol sequence:
-
-          expression  '+'  expression  •  '+'  …
-
-        Possible interpretations:
-
-          1:  (math_operation  expression  '+'  expression)  •  '+'  …
-          2:  expression  '+'  (math_operation  expression  •  '+'  expression)
-
-        Possible resolutions:
-
-          1:  Specify a left or right associativity in `math_operation`
-          2:  Add a conflict for these rules: `math_operation`
-      )MESSAGE")));
-
-      result = ts_compile_grammar(fill_template(grammar_template, {
-        {"math_operation_prec_type", "PREC_LEFT"}
-      }).c_str());
-
-      ts_document_set_language(document, load_compile_result("associativity_example", result));
-      ts_document_parse(document);
-      assert_root_node("(expression (math_operation "
-        "(expression (math_operation (expression (identifier)) (expression (identifier)))) "
-        "(expression (identifier))))");
-
-      result = ts_compile_grammar(fill_template(grammar_template, {
-        {"math_operation_prec_type", "PREC_RIGHT"}
-      }).c_str());
-
-      ts_document_set_language(document, load_compile_result("associativity_example", result));
-      ts_document_parse(document);
-      assert_root_node("(expression (math_operation "
-        "(expression (identifier)) "
-        "(expression (math_operation (expression (identifier)) (expression (identifier))))))");
-    });
-
-    it("can resolve shift/reduce conflicts involving single-child rules using precedence", [&]() {
-      string grammar_template = R"JSON({
-        "name": "associativity_example",
-
-        "extras": [
-          {"type": "PATTERN", "value": "\\s"}
-        ],
-
-        "rules": {
-          "expression": {
-            "type": "CHOICE",
-            "members": [
-              {"type": "SYMBOL", "name": "function_call"},
-              {"type": "SYMBOL", "name": "identifier"}
-            ]
-          },
-
-          "function_call": {
-            "type": "PREC_RIGHT",
-            "value": {{function_call_precedence}},
-            "content": {
-              "type": "CHOICE",
-              "members": [
-                {
-                  "type": "SEQ",
-                  "members": [
-                    {"type": "SYMBOL", "name": "identifier"},
-                    {"type": "SYMBOL", "name": "expression"}
-                  ]
-                },
-                {
-                  "type": "SEQ",
-                  "members": [
-                    {"type": "SYMBOL", "name": "identifier"},
-                    {"type": "SYMBOL", "name": "block"}
-                  ]
-                },
-                {
-                  "type": "SEQ",
-                  "members": [
-                    {"type": "SYMBOL", "name": "identifier"},
-                    {"type": "SYMBOL", "name": "expression"},
-                    {"type": "SYMBOL", "name": "block"}
-                  ]
-                }
-              ]
-            }
-          },
-
-          "block": {
-            "type": "SEQ",
-            "members": [
-              {"type": "STRING", "value": "{"},
-              {"type": "SYMBOL", "name": "expression"},
-              {"type": "STRING", "value": "}"}
-            ]
-          },
-
-          "identifier": {
-            "type": "PATTERN",
-            "value": "[a-zA-Z]+"
-          }
-        }
-      })JSON";
-
-      // Ambiguity: is the trailing block associated with `bar` or `foo`?
-      ts_document_set_input_string(document, "foo bar { baz }");
-
-      TSCompileResult result = ts_compile_grammar(fill_template(grammar_template, {
-        {"function_call_precedence", "0"}
-      }).c_str());
-
-      AssertThat(result.error_message, Equals(dedent(R"MESSAGE(
-        Unresolved conflict for symbol sequence:
-
-          identifier  •  '{'  …
-
-        Possible interpretations:
-
-          1:  (expression  identifier)  •  '{'  …
-          2:  (function_call  identifier  •  block)
-
-        Possible resolutions:
-
-          1:  Specify a higher precedence in `function_call` than in the other rules.
-          2:  Specify a higher precedence in `expression` than in the other rules.
-          3:  Specify a left or right associativity in `expression`
-          4:  Add a conflict for these rules: `expression` `function_call`
-      )MESSAGE")));
-
-      // Giving function calls lower precedence than expressions causes `bar`
-      // to be treated as an expression passed to `foo`, not as a function
-      // that's being called with a block.
-      result = ts_compile_grammar(fill_template(grammar_template, {
-        {"function_call_precedence", "-1"}
-      }).c_str());
-
-      AssertThat(result.error_message, IsNull());
-      ts_document_set_language(document, load_compile_result("associativity_example", result));
-      ts_document_parse(document);
-      assert_root_node("(expression (function_call "
-        "(identifier) "
-        "(expression (identifier)) "
-        "(block (expression (identifier)))))");
-
-      // Giving function calls higher precedence than expressions causes `bar`
-      // to be treated as a function that's being called with a block, not as
-      // an expression passed to `foo`.
-      result = ts_compile_grammar(fill_template(grammar_template, {
-        {"function_call_precedence", "1"}
-      }).c_str());
-
-      AssertThat(result.error_message, IsNull());
-      ts_document_set_language(document, load_compile_result("associativity_example", result));
-      ts_document_set_input_string(document, "foo bar { baz }");
-      ts_document_parse(document);
-      assert_root_node("(expression (function_call "
-        "(identifier) "
-        "(expression (function_call "
-          "(identifier) "
-          "(block (expression (identifier)))))))");
-    });
-
-    it("handles precedence applied to specific rule subsequences (regression)", [&]() {
-      TSCompileResult result = ts_compile_grammar(R"JSON({
-        "name": "precedence_on_subsequence",
-
-        "extras": [
-          {"type": "STRING", "value": " "}
-        ],
-
-        "rules": {
-          "expression": {
-            "type": "PREC_LEFT",
-            "value": 0,
-            "content": {
-              "type": "CHOICE",
-              "members": [
-                {"type": "SYMBOL", "name": "function_call"},
-                {"type": "SYMBOL", "name": "identifier"},
-                {"type": "SYMBOL", "name": "scope_resolution"}
-              ]
-            }
-          },
-
-          "function_call": {
-            "type": "CHOICE",
-            "members": [
-              {
-                "type": "SEQ",
-                "members": [
-                  {"type": "SYMBOL", "name": "identifier"},
-                  {"type": "SYMBOL", "name": "expression"}
-                ]
-              },
-
-              {
-                "type": "PREC",
-                "value": 1,
-                "content": {
-                  "type": "SEQ",
-                  "members": [
-                    {"type": "SYMBOL", "name": "identifier"},
-                    {"type": "SYMBOL", "name": "block"}
-                  ]
-                }
-              },
-
-              {
-                "type": "PREC",
-                "value": -1,
-                "content": {
-                  "type": "SEQ",
-                  "members": [
-                    {"type": "SYMBOL", "name": "identifier"},
-                    {"type": "SYMBOL", "name": "do_block"}
-                  ]
-                }
-              },
-
-              {
-                "type": "SEQ",
-                "members": [
-                  {"type": "SYMBOL", "name": "identifier"},
-                  {
-                    "type": "PREC",
-                    "value": 1,
-                    "content": {
-                      "type": "SEQ",
-                      "members": [
-                        {"type": "SYMBOL", "name": "expression"},
-                        {"type": "SYMBOL", "name": "block"}
-                      ]
-                    }
-                  }
-                ]
-              },
-
-              {
-                "type": "SEQ",
-                "members": [
-                  {"type": "SYMBOL", "name": "identifier"},
-                  {
-                    "type": "PREC",
-                    "value": -1,
-                    "content": {
-                      "type": "SEQ",
-                      "members": [
-                        {"type": "SYMBOL", "name": "expression"},
-                        {"type": "SYMBOL", "name": "do_block"}
-                      ]
-                    }
-                  }
-                ]
-              }
-            ]
-          },
-
-          "scope_resolution": {
-            "type": "PREC_LEFT",
-            "value": 1,
-            "content": {
-              "type": "CHOICE",
-              "members": [
-                {
-                  "type": "SEQ",
-                  "members": [
-                    {"type": "SYMBOL", "name": "expression"},
-                    {"type": "STRING", "value": "::"},
-                    {"type": "SYMBOL", "name": "expression"}
-                  ]
-                },
-                {
-                  "type": "SEQ",
-                  "members": [
-                    {"type": "STRING", "value": "::"},
-                    {"type": "SYMBOL", "name": "expression"},
-                  ]
-                }
-              ]
-            }
-          },
-
-          "block": {
-            "type": "STRING",
-            "value": "{}"
-          },
-
-          "do_block": {
-            "type": "STRING",
-            "value": "do end"
-          },
-
-          "identifier": {
-            "type": "PATTERN",
-            "value": "[a-zA-Z]+"
-          }
-        }
-      })JSON");
-
-      auto language = load_compile_result("precedence_on_subsequence", result);
-      ts_document_set_language(document, language);
-
-      ts_document_set_input_string(document, "a b {}");
-      ts_document_parse(document);
-      assert_root_node("(expression (function_call "
-        "(identifier) "
-        "(expression (function_call (identifier) (block)))))");
-
-      ts_document_set_input_string(document, "a b do end");
-      ts_document_parse(document);
-      assert_root_node("(expression (function_call "
-        "(identifier) "
-        "(expression (identifier)) "
-        "(do_block)))");
-    });
-
-    it("does not allow conflicting precedences", [&]() {
-      string grammar_template = R"JSON({
-        "name": "conflicting_precedence_example",
-
-        "rules": {
-          "expression": {
-            "type": "CHOICE",
-            "members": [
-              {"type": "SYMBOL", "name": "sum"},
-              {"type": "SYMBOL", "name": "product"},
-              {"type": "SYMBOL", "name": "other_thing"}
-            ]
-          },
-
-          "sum": {
-            "type": "PREC_LEFT",
-            "value": 0,
-            "content": {
-              "type": "SEQ",
-              "members": [
-                {"type": "SYMBOL", "name": "expression"},
-                {"type": "STRING", "value": "+"},
-                {"type": "SYMBOL", "name": "expression"}
-              ]
-            }
-          },
-
-          "product": {
-            "type": "PREC_LEFT",
-            "value": 1,
-            "content": {
-              "type": "SEQ",
-              "members": [
-                {"type": "SYMBOL", "name": "expression"},
-                {"type": "STRING", "value": "*"},
-                {"type": "SYMBOL", "name": "expression"}
-              ]
-            }
-          },
-
-          "other_thing": {
-            "type": "PREC_LEFT",
-            "value": -1,
-            "content": {
-              "type": "SEQ",
-              "members": [
-                {"type": "SYMBOL", "name": "expression"},
-                {"type": "STRING", "value": "*"},
-                {"type": "STRING", "value": "*"}
-              ]
-            }
-          },
-
-          "identifier": {
-            "type": "PATTERN",
-            "value": "[a-zA-Z]+"
-          }
-        }
-      })JSON";
-
-      TSCompileResult result = ts_compile_grammar(fill_template(grammar_template, {
-      }).c_str());
-
-      AssertThat(result.error_message, Equals(dedent(R"MESSAGE(
-        Unresolved conflict for symbol sequence:
-
-          expression  '+'  expression  •  '*'  …
-
-        Possible interpretations:
-
-          1:  (sum  expression  '+'  expression)  •  '*'  …
-          2:  expression  '+'  (product  expression  •  '*'  expression)
-          3:  expression  '+'  (other_thing  expression  •  '*'  '*')
-
-        Possible resolutions:
-
-          1:  Specify a higher precedence in `product` and `other_thing` than in the other rules.
-          2:  Specify a higher precedence in `sum` than in the other rules.
-          3:  Add a conflict for these rules: `sum` `product` `other_thing`
-      )MESSAGE")));
-    });
-  });
-
-  describe("when the grammar contains rules that match the empty string", [&]() {
-    it("reports an error", [&]() {
-      TSCompileResult result = ts_compile_grammar(R"JSON(
-        {
-          "name": "empty_rules",
-
-          "rules": {
-            "rule_1": {"type": "SYMBOL", "name": "rule_2"},
-
-            "rule_2": {
-              "type": "CHOICE",
-              "members": [
-                {"type": "SYMBOL", "name": "rule_1"},
-                {"type": "BLANK"}
-              ]
-            }
-          }
-        }
-      )JSON");
-
-      AssertThat(result.error_message, Equals(dedent(R"MESSAGE(
-        The rule `rule_2` matches the empty string.
-        Tree-sitter currently does not support syntactic rules that match the empty string.
-      )MESSAGE")));
-    });
-  });
-
-  describe("external scanners", [&]() {
-    it("can tokenize using arbitrary user-defined scanner functions", [&]() {
-      string grammar = R"JSON({
-        "name": "external_scanner_example",
-
-        "externals": [
-          "_percent_string",
-          "_percent_string_start",
-          "_percent_string_end"
-        ],
-
-        "extras": [
-          {"type": "PATTERN", "value": "\\s"}
-        ],
-
-        "rules": {
-          "expression": {
-            "type": "CHOICE",
-            "members": [
-              {"type": "SYMBOL", "name": "string"},
-              {"type": "SYMBOL", "name": "sum"},
-              {"type": "SYMBOL", "name": "identifier"}
-            ]
-          },
-
-          "sum": {
-            "type": "PREC_LEFT",
-            "value": 0,
-            "content": {
-              "type": "SEQ",
-              "members": [
-                {"type": "SYMBOL", "name": "expression"},
-                {"type": "STRING", "value": "+"},
-                {"type": "SYMBOL", "name": "expression"}
-              ]
-            }
-          },
-
-          "string": {
-            "type": "CHOICE",
-            "members": [
-              {"type": "SYMBOL", "name": "_percent_string"},
-              {
-                "type": "SEQ",
-                "members": [
-                  {"type": "SYMBOL", "name": "_percent_string_start"},
-                  {"type": "SYMBOL", "name": "expression"},
-                  {"type": "SYMBOL", "name": "_percent_string_end"}
-                ]
-              },
-            ]
-          },
-
-          "identifier": {
-            "type": "PATTERN",
-            "value": "\\a+"
-          }
-        }
-      })JSON";
-
-      TSCompileResult result = ts_compile_grammar(grammar.c_str());
-      AssertThat(result.error_message, IsNull());
-
-      ts_document_set_language(document, load_compile_result(
-        "external_scanner_example",
-        result,
-        "spec/fixtures/external_scanners/percent_strings.c"
-      ));
-
-      ts_document_set_input_string(document, "x + %(sup (external) scanner?)");
-      ts_document_parse(document);
-      assert_root_node("(expression (sum (expression (identifier)) (expression (string))))");
-
-      ts_document_set_input_string(document, "%{sup {} #{x + y} {} scanner?}");
-      ts_document_parse(document);
-      assert_root_node("(expression (string (expression (sum (expression (identifier)) (expression (identifier))))))");
-    });
-
-    it("allows external scanners to refer to tokens that are defined internally", [&]() {
-      string grammar = R"JSON({
-        "name": "shared_external_tokens",
-
-        "externals": [
-          "string",
-          "line_break"
-        ],
-
-        "extras": [
-          {"type": "PATTERN", "value": "\\s"}
-        ],
-
-        "rules": {
-          "statement": {
-            "type": "SEQ",
-            "members": [
-              {"type": "SYMBOL", "name": "_expression"},
-              {"type": "SYMBOL", "name": "_expression"},
-              {"type": "SYMBOL", "name": "line_break"}
-            ]
-          },
-
-          "_expression": {
-            "type": "CHOICE",
-            "members": [
-              {"type": "SYMBOL", "name": "string"},
-              {"type": "SYMBOL", "name": "variable"},
-              {"type": "SYMBOL", "name": "number"}
-            ]
-          },
-
-          "variable": {"type": "PATTERN", "value": "\\a+"},
-          "number": {"type": "PATTERN", "value": "\\d+"},
-          "line_break": {"type": "STRING", "value": "\n"}
-        }
-      })JSON";
-
-      TSCompileResult result = ts_compile_grammar(grammar.c_str());
-      AssertThat(result.error_message, IsNull());
-
-      ts_document_set_language(document, load_compile_result(
-        "shared_external_tokens",
-        result,
-        "spec/fixtures/external_scanners/shared_external_tokens.c"
-      ));
-
-      ts_document_set_input_string(document, "a b\n");
-      ts_document_parse(document);
-      assert_root_node("(statement (variable) (variable) (line_break))");
-
-      ts_document_set_input_string(document, "a \nb\n");
-      ts_document_parse(document);
-      assert_root_node("(statement (variable) (variable) (line_break))");
-
-      ts_document_set_input_string(document, "'hello' 'world'\n");
-      ts_document_parse(document);
-      assert_root_node("(statement (string) (string) (line_break))");
-
-      ts_document_set_input_string(document, "'hello' \n'world'\n");
-      ts_document_parse(document);
-      assert_root_node("(statement (string) (string) (line_break))");
-    });
-
-    it("allows external tokens to be used as extras", [&]() {
-      string grammar = R"JSON({
-        "name": "extra_external_tokens",
-
-        "externals": [
-          "comment"
-        ],
-
-        "extras": [
-          {"type": "PATTERN", "value": "\\s"},
-          {"type": "SYMBOL", "name": "comment"}
-        ],
-
-        "rules": {
-          "assignment": {
-            "type": "SEQ",
-            "members": [
-              {"type": "SYMBOL", "name": "variable"},
-              {"type": "STRING", "value": "="},
-              {"type": "SYMBOL", "name": "variable"}
-            ]
-          },
-
-          "variable": {"type": "PATTERN", "value": "\\a+"}
-        }
-      })JSON";
-
-      TSCompileResult result = ts_compile_grammar(grammar.c_str());
-      AssertThat(result.error_message, IsNull());
-
-      ts_document_set_language(document, load_compile_result(
-        "extra_external_tokens",
-        result,
-        "spec/fixtures/external_scanners/extra_external_tokens.c"
-      ));
-
-      ts_document_set_input_string(document, "x = # a comment\n y");
-      ts_document_parse(document);
-      assert_root_node("(assignment (variable) (comment) (variable))");
-    });
-  });
-
-  describe("when the grammar's start symbol is a token", [&]() {
-    it("parses the token", [&]() {
-      TSCompileResult result = ts_compile_grammar(R"JSON(
-        {
-          "name": "one_token_language",
-          "rules": {
-            "first_rule": {"type": "STRING", "value": "the-value"}
-          }
-        }
-      )JSON");
-
-      ts_document_set_language(document, load_compile_result("one_token_language", result));
-
-      ts_document_set_input_string(document, "the-value");
-      ts_document_parse(document);
-      assert_root_node("(first_rule)");
-    });
-  });
-
-  describe("when the grammar's start symbol is blank", [&]() {
-    it("parses the empty string", [&]() {
-      TSCompileResult result = ts_compile_grammar(R"JSON(
-        {
-          "name": "blank_language",
-          "rules": {
-            "first_rule": {"type": "BLANK"}
-          }
-        }
-      )JSON");
-
-      ts_document_set_language(document, load_compile_result("blank_language", result));
-
-      ts_document_set_input_string(document, "");
-      ts_document_parse(document);
-      assert_root_node("(first_rule)");
-    });
-  });
-
-  describe("when the grammar contains anonymous tokens with escaped characters", [&]() {
-    it("escapes the escaped characters properly in the generated parser", [&]() {
-      TSCompileResult result = ts_compile_grammar(R"JSON(
-        {
-          "name": "escaped_char_language",
-          "rules": {
-            "first_rule": {
-              "type": "CHOICE",
-              "members": [
-                {"type": "STRING", "value": "\n"},
-                {"type": "STRING", "value": "\r"},
-                {"type": "STRING", "value": "'hello'"},
-                {"type": "PATTERN", "value": "\\d+"}
-              ]
-            }
-          }
-        }
-      )JSON");
-
-      ts_document_set_language(document, load_compile_result("escaped_char_language", result));
-
-      ts_document_set_input_string(document, "1234");
-      ts_document_parse(document);
-      assert_root_node("(first_rule)");
-
-      ts_document_set_input_string(document, "\n");
-      ts_document_parse(document);
-      assert_root_node("(first_rule)");
-
-      ts_document_set_input_string(document, "'hello'");
-      ts_document_parse(document);
-      assert_root_node("(first_rule)");
-    });
-  });
-
-  describe("the grammar in the README", [&]() {
-    it("parses the input in the README", [&]() {
-      TSCompileResult result = ts_compile_grammar(R"JSON(
-        {
-          "name": "arithmetic",
-
-          // Things that can appear anywhere in the language, like comments
-          // and whitespace, are expressed as 'extras'.
-          "extras": [
-            {"type": "PATTERN", "value": "\\s"},
-            {"type": "SYMBOL", "name": "comment"}
-          ],
-
-          "rules": {
-
-            // The first rule listed in the grammar becomes the 'start rule'.
-            "expression": {
-              "type": "CHOICE",
-              "members": [
-                {"type": "SYMBOL", "name": "sum"},
-                {"type": "SYMBOL", "name": "product"},
-                {"type": "SYMBOL", "name": "number"},
-                {"type": "SYMBOL", "name": "variable"},
-                {
-                  "type": "SEQ",
-                  "members": [
-                    {"type": "STRING", "value": "("},
-                    {"type": "SYMBOL", "name": "expression"},
-                    {"type": "STRING", "value": ")"}
-                  ]
-                }
-              ]
-            },
-
-            // Tokens like '+' and '*' are described directly within the
-            // grammar's rules, as opposed to in a seperate lexer description.
-            "sum": {
-              "type": "PREC_LEFT",
-              "value": 1,
-              "content": {
-                "type": "SEQ",
-                "members": [
-                  {"type": "SYMBOL", "name": "expression"},
-                  {"type": "STRING", "value": "+"},
-                  {"type": "SYMBOL", "name": "expression"}
-                ]
-              }
-            },
-
-            // Ambiguities can be resolved at compile time by assigning precedence
-            // values to rule subtrees.
-            "product": {
-              "type": "PREC_LEFT",
-              "value": 2,
-              "content": {
-                "type": "SEQ",
-                "members": [
-                  {"type": "SYMBOL", "name": "expression"},
-                  {"type": "STRING", "value": "*"},
-                  {"type": "SYMBOL", "name": "expression"}
-                ]
-              }
-            },
-
-            // Tokens can be specified using ECMAScript regexps.
-            "number": {"type": "PATTERN", "value": "\\d+"},
-            "comment": {"type": "PATTERN", "value": "#.*"},
-            "variable": {"type": "PATTERN", "value": "[a-zA-Z]\\w*"}
-          }
-        }
-      )JSON");
-
-      const TSLanguage *language = load_compile_result("arithmetic", result);
-
-      ts_document_set_language(document, language);
-      ts_document_set_input_string(document, "a + b * c");
-      ts_document_parse(document);
-
-      assert_root_node(
-        "(expression (sum "
-          "(expression (variable)) "
-          "(expression (product "
-             "(expression (variable)) "
-             "(expression (variable))))))");
-    });
-  });
-});
-
-END_TEST
--- a/spec/integration/corpus_specs.cc
+++ b/spec/integration/corpus_specs.cc
@ -1,185 +0,0 @@
-#include "spec_helper.h"
-#include "runtime/alloc.h"
-#include "helpers/load_language.h"
-#include "helpers/read_test_entries.h"
-#include "helpers/spy_input.h"
-#include "helpers/stderr_logger.h"
-#include "helpers/point_helpers.h"
-#include "helpers/encoding_helpers.h"
-#include "helpers/record_alloc.h"
-#include "helpers/random_helpers.h"
-#include "helpers/scope_sequence.h"
-#include <set>
-
-static void assert_correct_tree_shape(const TSDocument *document, string tree_string) {
-  TSNode root_node = ts_document_root_node(document);
-  const char *node_string = ts_node_string(root_node, document);
-  string result(node_string);
-  ts_free((void *)node_string);
-  AssertThat(result, Equals(tree_string));
-}
-
-static void assert_consistent_sizes(TSNode node) {
-  size_t child_count = ts_node_child_count(node);
-  size_t start_byte = ts_node_start_byte(node);
-  size_t end_byte = ts_node_end_byte(node);
-  TSPoint start_point = ts_node_start_point(node);
-  TSPoint end_point = ts_node_end_point(node);
-  bool some_child_has_changes = false;
-
-  AssertThat(start_byte, !IsGreaterThan(end_byte));
-  AssertThat(start_point, !IsGreaterThan(end_point));
-
-  size_t last_child_end_byte = start_byte;
-  TSPoint last_child_end_point = start_point;
-
-  for (size_t i = 0; i < child_count; i++) {
-    TSNode child = ts_node_child(node, i);
-    size_t child_start_byte = ts_node_start_byte(child);
-    TSPoint child_start_point = ts_node_start_point(child);
-
-    AssertThat(child_start_byte, !IsLessThan(last_child_end_byte));
-    AssertThat(child_start_point, !IsLessThan(last_child_end_point));
-    assert_consistent_sizes(child);
-    if (ts_node_has_changes(child))
-      some_child_has_changes = true;
-
-    last_child_end_byte = ts_node_end_byte(child);
-    last_child_end_point = ts_node_end_point(child);
-  }
-
-  if (child_count > 0) {
-    AssertThat(end_byte, !IsLessThan(last_child_end_byte));
-    AssertThat(end_point, !IsLessThan(last_child_end_point));
-  }
-
-  if (some_child_has_changes) {
-    AssertThat(ts_node_has_changes(node), IsTrue());
-  }
-}
-
-static void assert_correct_tree_size(TSDocument *document, string content) {
-  TSNode root_node = ts_document_root_node(document);
-  size_t expected_size = content.size();
-
-  // In the JSON grammar, the start rule (`_value`) is hidden, so the node
-  // returned from `ts_document_root_node` (e.g. an `object` node), does not
-  // actually point to the root of the tree. In this weird case, trailing
-  // whitespace is not included in the root node's size.
-  //
-  // TODO: Fix this inconsistency. Maybe disallow the start rule being hidden?
-  if (ts_document_language(document) == get_test_language("json") &&
-      string(ts_node_type(root_node, document)) != "ERROR")
-    expected_size = content.find_last_not_of("\n ") + 1;
-
-  AssertThat(ts_node_end_byte(root_node), Equals(expected_size));
-  assert_consistent_sizes(root_node);
-}
-
-START_TEST
-
-describe("The Corpus", []() {
-  vector<string> test_languages({
-    "javascript",
-    "json",
-    "c",
-    "cpp",
-    "python",
-  });
-
-  for (auto &language_name : test_languages) {
-    describe(("the " + language_name + " language").c_str(), [&]() {
-      TSDocument *document;
-
-      before_each([&]() {
-        record_alloc::start();
-        document = ts_document_new();
-        ts_document_set_language(document, get_test_language(language_name));
-
-        // ts_document_set_logger(document, stderr_logger_new(true));
-        // ts_document_print_debugging_graphs(document, true);
-      });
-
-      after_each([&]() {
-        ts_document_free(document);
-        AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
-      });
-
-      for (auto &entry : read_corpus_entries(language_name)) {
-        SpyInput *input;
-
-        auto it_handles_edit_sequence = [&](string name, std::function<void()> edit_sequence){
-          it(("parses " + entry.description + ": " + name).c_str(), [&]() {
-            input = new SpyInput(entry.input, 3);
-            ts_document_set_input(document, input->input());
-            edit_sequence();
-            assert_correct_tree_shape(document, entry.tree_string);
-            assert_correct_tree_size(document, input->content);
-            delete input;
-          });
-        };
-
-        it_handles_edit_sequence("initial parse", [&]() {
-          ts_document_parse(document);
-        });
-
-        std::set<std::pair<size_t, size_t>> deletions;
-        std::set<std::pair<size_t, string>> insertions;
-
-        for (size_t i = 0; i < 60; i++) {
-          size_t edit_position = random() % utf8_char_count(entry.input);
-          size_t deletion_size = random() % (utf8_char_count(entry.input) - edit_position);
-          string inserted_text = random_words(random() % 4 + 1);
-
-          if (insertions.insert({edit_position, inserted_text}).second) {
-            string description = "\"" + inserted_text + "\" at " + to_string(edit_position);
-
-            it_handles_edit_sequence("repairing an insertion of " + description, [&]() {
-              ts_document_edit(document, input->replace(edit_position, 0, inserted_text));
-              ts_document_parse(document);
-              assert_correct_tree_size(document, input->content);
-
-              ts_document_edit(document, input->undo());
-              assert_correct_tree_size(document, input->content);
-
-              TSRange *ranges;
-              uint32_t range_count;
-              ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content);
-              ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count);
-
-              ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content);
-              verify_changed_ranges(old_scope_sequence, new_scope_sequence,
-                                    input->content, ranges, range_count);
-              ts_free(ranges);
-            });
-          }
-
-          if (deletions.insert({edit_position, deletion_size}).second) {
-            string desription = to_string(edit_position) + "-" + to_string(edit_position + deletion_size);
-
-            it_handles_edit_sequence("repairing a deletion of " + desription, [&]() {
-              ts_document_edit(document, input->replace(edit_position, deletion_size, ""));
-              ts_document_parse(document);
-              assert_correct_tree_size(document, input->content);
-
-              ts_document_edit(document, input->undo());
-              assert_correct_tree_size(document, input->content);
-
-              TSRange *ranges;
-              uint32_t range_count;
-              ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content);
-              ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count);
-
-              ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content);
-              verify_changed_ranges(old_scope_sequence, new_scope_sequence,
-                                    input->content, ranges, range_count);
-              ts_free(ranges);
-            });
-          }
-        }
-      }
-    });
-  }
-});
-
-END_TEST
--- a/spec/integration/real_grammars.cc
+++ b/spec/integration/real_grammars.cc
@ -0,0 +1,181 @@
+#include "spec_helper.h"
+#include "runtime/alloc.h"
+#include "helpers/load_language.h"
+#include "helpers/read_test_entries.h"
+#include "helpers/spy_input.h"
+#include "helpers/stderr_logger.h"
+#include "helpers/point_helpers.h"
+#include "helpers/encoding_helpers.h"
+#include "helpers/record_alloc.h"
+#include "helpers/random_helpers.h"
+#include "helpers/scope_sequence.h"
+#include <set>
+
+static void assert_consistent_sizes(TSNode node) {
+  size_t child_count = ts_node_child_count(node);
+  size_t start_byte = ts_node_start_byte(node);
+  size_t end_byte = ts_node_end_byte(node);
+  TSPoint start_point = ts_node_start_point(node);
+  TSPoint end_point = ts_node_end_point(node);
+  bool some_child_has_changes = false;
+
+  AssertThat(start_byte, !IsGreaterThan(end_byte));
+  AssertThat(start_point, !IsGreaterThan(end_point));
+
+  size_t last_child_end_byte = start_byte;
+  TSPoint last_child_end_point = start_point;
+
+  for (size_t i = 0; i < child_count; i++) {
+    TSNode child = ts_node_child(node, i);
+    size_t child_start_byte = ts_node_start_byte(child);
+    TSPoint child_start_point = ts_node_start_point(child);
+
+    AssertThat(child_start_byte, !IsLessThan(last_child_end_byte));
+    AssertThat(child_start_point, !IsLessThan(last_child_end_point));
+    assert_consistent_sizes(child);
+    if (ts_node_has_changes(child))
+      some_child_has_changes = true;
+
+    last_child_end_byte = ts_node_end_byte(child);
+    last_child_end_point = ts_node_end_point(child);
+  }
+
+  if (child_count > 0) {
+    AssertThat(end_byte, !IsLessThan(last_child_end_byte));
+    AssertThat(end_point, !IsLessThan(last_child_end_point));
+  }
+
+  if (some_child_has_changes) {
+    AssertThat(ts_node_has_changes(node), IsTrue());
+  }
+}
+
+static void assert_correct_tree_size(TSDocument *document, string content) {
+  TSNode root_node = ts_document_root_node(document);
+  size_t expected_size = content.size();
+
+  // In the JSON grammar, the start rule (`_value`) is hidden, so the node
+  // returned from `ts_document_root_node` (e.g. an `object` node), does not
+  // actually point to the root of the tree. In this weird case, trailing
+  // whitespace is not included in the root node's size.
+  //
+  // TODO: Fix this inconsistency. Maybe disallow the start rule being hidden?
+  if (ts_document_language(document) == load_real_language("json") &&
+      string(ts_node_type(root_node, document)) != "ERROR")
+    expected_size = content.find_last_not_of("\n ") + 1;
+
+  AssertThat(ts_node_end_byte(root_node), Equals(expected_size));
+  assert_consistent_sizes(root_node);
+}
+
+START_TEST
+
+vector<string> test_languages({
+  "javascript",
+  "json",
+  "c",
+  "cpp",
+  "python",
+});
+
+for (auto &language_name : test_languages) {
+  describe(("the " + language_name + " language").c_str(), [&]() {
+    TSDocument *document;
+
+    before_each([&]() {
+      record_alloc::start();
+      document = ts_document_new();
+      ts_document_set_language(document, load_real_language(language_name));
+
+      // ts_document_set_logger(document, stderr_logger_new(true));
+      // ts_document_print_debugging_graphs(document, true);
+    });
+
+    after_each([&]() {
+      ts_document_free(document);
+      AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
+    });
+
+    for (auto &entry : read_real_language_corpus(language_name)) {
+      SpyInput *input;
+
+      auto it_handles_edit_sequence = [&](string name, std::function<void()> edit_sequence){
+        it(("parses " + entry.description + ": " + name).c_str(), [&]() {
+          input = new SpyInput(entry.input, 3);
+          ts_document_set_input(document, input->input());
+          edit_sequence();
+
+          TSNode root_node = ts_document_root_node(document);
+          const char *node_string = ts_node_string(root_node, document);
+          string result(node_string);
+          ts_free((void *)node_string);
+          AssertThat(result, Equals(entry.tree_string));
+
+          assert_correct_tree_size(document, input->content);
+          delete input;
+        });
+      };
+
+      it_handles_edit_sequence("initial parse", [&]() {
+        ts_document_parse(document);
+      });
+
+      std::set<std::pair<size_t, size_t>> deletions;
+      std::set<std::pair<size_t, string>> insertions;
+
+      for (size_t i = 0; i < 60; i++) {
+        size_t edit_position = random() % utf8_char_count(entry.input);
+        size_t deletion_size = random() % (utf8_char_count(entry.input) - edit_position);
+        string inserted_text = random_words(random() % 4 + 1);
+
+        if (insertions.insert({edit_position, inserted_text}).second) {
+          string description = "\"" + inserted_text + "\" at " + to_string(edit_position);
+
+          it_handles_edit_sequence("repairing an insertion of " + description, [&]() {
+            ts_document_edit(document, input->replace(edit_position, 0, inserted_text));
+            ts_document_parse(document);
+            assert_correct_tree_size(document, input->content);
+
+            ts_document_edit(document, input->undo());
+            assert_correct_tree_size(document, input->content);
+
+            TSRange *ranges;
+            uint32_t range_count;
+            ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content);
+            ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count);
+
+            ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content);
+            verify_changed_ranges(old_scope_sequence, new_scope_sequence,
+                                  input->content, ranges, range_count);
+            ts_free(ranges);
+          });
+        }
+
+        if (deletions.insert({edit_position, deletion_size}).second) {
+          string desription = to_string(edit_position) + "-" + to_string(edit_position + deletion_size);
+
+          it_handles_edit_sequence("repairing a deletion of " + desription, [&]() {
+            ts_document_edit(document, input->replace(edit_position, deletion_size, ""));
+            ts_document_parse(document);
+            assert_correct_tree_size(document, input->content);
+
+            ts_document_edit(document, input->undo());
+            assert_correct_tree_size(document, input->content);
+
+            TSRange *ranges;
+            uint32_t range_count;
+            ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content);
+            ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count);
+
+            ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content);
+            verify_changed_ranges(old_scope_sequence, new_scope_sequence,
+                                  input->content, ranges, range_count);
+            ts_free(ranges);
+          });
+        }
+      }
+    }
+  });
+}
+
+END_TEST
--- a/spec/integration/test_grammars.cc
+++ b/spec/integration/test_grammars.cc
@ -0,0 +1,78 @@
+#include "spec_helper.h"
+#include "helpers/read_test_entries.h"
+#include "helpers/load_language.h"
+#include "helpers/stderr_logger.h"
+#include "helpers/file_helpers.h"
+#include "runtime/alloc.h"
+
+START_TEST
+
+string grammars_dir_path = "spec/fixtures/test_grammars";
+vector<string> test_languages = list_directory(grammars_dir_path);
+
+for (auto &language_name : test_languages) {
+  if (language_name == "readme.md") continue;
+
+  describe(("test language: " + language_name).c_str(), [&]() {
+    string directory_path = grammars_dir_path + "/" + language_name;
+    string grammar_path = directory_path + "/grammar.json";
+    string external_scanner_path = directory_path + "/scanner.c";
+    string expected_error_path = directory_path + "/expected_error.txt";
+    string corpus_path = directory_path + "/corpus.txt";
+
+    if (!file_exists(external_scanner_path)) {
+      external_scanner_path = "";
+    }
+
+    string grammar_json = read_file(grammar_path);
+    TSCompileResult compile_result = ts_compile_grammar(grammar_json.c_str());
+
+    if (file_exists(expected_error_path)) {
+      it("fails with the correct error message", [&]() {
+        string expected_error = read_file(expected_error_path);
+        AssertThat((void *)compile_result.error_message, !IsNull());
+        AssertThat(compile_result.error_message, Equals(expected_error));
+      });
+
+      return;
+    } else {
+      TSDocument *document = nullptr;
+      const TSLanguage *language = nullptr;
+
+      before_each([&]() {
+        if (!language) {
+          language = load_test_language(
+            language_name,
+            compile_result,
+            external_scanner_path
+          );
+        }
+
+        document = ts_document_new();
+        ts_document_set_language(document, language);
+
+        // ts_document_set_logger(document, stderr_logger_new(true));
+        // ts_document_print_debugging_graphs(document, true);
+      });
+
+      after_each([&]() {
+        if (document) ts_document_free(document);
+      });
+
+      for (auto &entry : read_test_language_corpus(language_name)) {
+        it(("parses " + entry.description).c_str(), [&]() {
+          ts_document_set_input_string_with_length(document, entry.input.c_str(), entry.input.size());
+          ts_document_parse(document);
+
+          TSNode root_node = ts_document_root_node(document);
+          const char *node_string = ts_node_string(root_node, document);
+          string result(node_string);
+          ts_free((void *)node_string);
+          AssertThat(result, Equals(entry.tree_string));
+        });
+      }
+    }
+  });
+}
+
+END_TEST
--- a/spec/runtime/document_spec.cc
+++ b/spec/runtime/document_spec.cc
@ -43,7 +43,7 @@ describe("Document", [&]() {
    before_each([&]() {
      spy_input = new SpyInput("{\"key\": [null, 2]}", 3);

-      ts_document_set_language(document, get_test_language("json"));
+      ts_document_set_language(document, load_real_language("json"));
      ts_document_set_input_string(document, "{\"key\": [1, 2]}");
      ts_document_parse(document);

@ -152,7 +152,7 @@ describe("Document", [&]() {
    });

    it("uses the given language for future parses", [&]() {
-      ts_document_set_language(document, get_test_language("json"));
+      ts_document_set_language(document, load_real_language("json"));
      ts_document_parse(document);

      root = ts_document_root_node(document);
@ -162,10 +162,10 @@ describe("Document", [&]() {
    });

    it("clears out any previous tree", [&]() {
-      ts_document_set_language(document, get_test_language("json"));
+      ts_document_set_language(document, load_real_language("json"));
      ts_document_parse(document);

-      ts_document_set_language(document, get_test_language("javascript"));
+      ts_document_set_language(document, load_real_language("javascript"));
      AssertThat(ts_document_root_node(document).data, Equals<void *>(nullptr));

      ts_document_parse(document);
@ -177,7 +177,7 @@ describe("Document", [&]() {
    });

    it("does not allow setting a language with a different version number", [&]() {
-      TSLanguage language = *get_test_language("json");
+      TSLanguage language = *load_real_language("json");
      AssertThat(ts_language_version(&language), Equals<uint32_t>(TREE_SITTER_LANGUAGE_VERSION));

      language.version++;
@ -193,7 +193,7 @@ describe("Document", [&]() {

    before_each([&]() {
      logger = new SpyLogger();
-      ts_document_set_language(document, get_test_language("json"));
+      ts_document_set_language(document, load_real_language("json"));
      ts_document_set_input_string(document, "[1, 2]");
    });

@ -235,7 +235,7 @@ describe("Document", [&]() {
    SpyInput *input;

    before_each([&]() {
-      ts_document_set_language(document, get_test_language("javascript"));
+      ts_document_set_language(document, load_real_language("javascript"));
      input = new SpyInput("{a: null};", 3);
      ts_document_set_input(document, input->input());
      ts_document_parse(document);
--- a/spec/runtime/node_spec.cc
+++ b/spec/runtime/node_spec.cc
@ -40,7 +40,7 @@ describe("Node", []() {
    record_alloc::start();

    document = ts_document_new();
-    ts_document_set_language(document, get_test_language("json"));
+    ts_document_set_language(document, load_real_language("json"));
    ts_document_set_input_string(document, input_string.c_str());
    ts_document_parse(document);

--- a/spec/runtime/parser_spec.cc
+++ b/spec/runtime/parser_spec.cc
@ -83,7 +83,7 @@ describe("Parser", [&]() {
  describe("handling errors", [&]() {
    describe("when there is an invalid substring right before a valid token", [&]() {
      it("computes the error node's size and position correctly", [&]() {
-        ts_document_set_language(document, get_test_language("json"));
+        ts_document_set_language(document, load_real_language("json"));
        set_text("  [123,  @@@@@,   true]");

        assert_root_node(
@ -108,7 +108,7 @@ describe("Parser", [&]() {

    describe("when there is an unexpected string in the middle of a token", [&]() {
      it("computes the error node's size and position correctly", [&]() {
-        ts_document_set_language(document, get_test_language("json"));
+        ts_document_set_language(document, load_real_language("json"));
        set_text("  [123, faaaaalse, true]");

        assert_root_node(
@ -134,7 +134,7 @@ describe("Parser", [&]() {

    describe("when there is one unexpected token between two valid tokens", [&]() {
      it("computes the error node's size and position correctly", [&]() {
-        ts_document_set_language(document, get_test_language("json"));
+        ts_document_set_language(document, load_real_language("json"));
        set_text("  [123, true false, true]");

        assert_root_node(
@ -153,7 +153,7 @@ describe("Parser", [&]() {

    describe("when there is an unexpected string at the end of a token", [&]() {
      it("computes the error's size and position correctly", [&]() {
-        ts_document_set_language(document, get_test_language("json"));
+        ts_document_set_language(document, load_real_language("json"));
        set_text("  [123, \"hi\n, true]");

        assert_root_node(
@ -163,7 +163,7 @@ describe("Parser", [&]() {

    describe("when there is an unterminated error", [&]() {
      it("maintains a consistent tree", [&]() {
-        ts_document_set_language(document, get_test_language("javascript"));
+        ts_document_set_language(document, load_real_language("javascript"));
        set_text("a; /* b");
        assert_root_node(
          "(ERROR (program (expression_statement (identifier))) (UNEXPECTED EOF))");
@ -172,7 +172,7 @@ describe("Parser", [&]() {

    describe("when there are extra tokens at the end of the viable prefix", [&]() {
      it("does not include them in the error node", [&]() {
-        ts_document_set_language(document, get_test_language("javascript"));
+        ts_document_set_language(document, load_real_language("javascript"));
        set_text(
          "var x;\n"
          "\n"
@ -192,7 +192,7 @@ describe("Parser", [&]() {
  describe("handling extra tokens", [&]() {
    describe("when the token appears as part of a grammar rule", [&]() {
      it("incorporates it into the tree", [&]() {
-        ts_document_set_language(document, get_test_language("javascript"));
+        ts_document_set_language(document, load_real_language("javascript"));
        set_text("fn()\n");

        assert_root_node(
@ -202,7 +202,7 @@ describe("Parser", [&]() {

    describe("when the token appears somewhere else", [&]() {
      it("incorporates it into the tree", [&]() {
-        ts_document_set_language(document, get_test_language("javascript"));
+        ts_document_set_language(document, load_real_language("javascript"));
        set_text(
          "fn()\n"
          "  .otherFn();");
@ -218,7 +218,7 @@ describe("Parser", [&]() {

    describe("when several extra tokens appear in a row", [&]() {
      it("incorporates them into the tree", [&]() {
-        ts_document_set_language(document, get_test_language("javascript"));
+        ts_document_set_language(document, load_real_language("javascript"));
        set_text(
          "fn()\n\n"
          "// This is a comment"
@ -239,7 +239,7 @@ describe("Parser", [&]() {
  describe("editing", [&]() {
    describe("creating new tokens near the end of the input", [&]() {
      it("updates the parse tree and re-reads only the changed portion of the text", [&]() {
-        ts_document_set_language(document, get_test_language("javascript"));
+        ts_document_set_language(document, load_real_language("javascript"));
        set_text("x * (100 + abc);");

        assert_root_node(
@ -262,7 +262,7 @@ describe("Parser", [&]() {
      it("updates the parse tree and re-reads only the changed portion of the input", [&]() {
        chunk_size = 2;

-        ts_document_set_language(document, get_test_language("javascript"));
+        ts_document_set_language(document, load_real_language("javascript"));
        set_text("123 + 456 * (10 + x);");

        assert_root_node(
@ -285,7 +285,7 @@ describe("Parser", [&]() {

    describe("introducing an error", [&]() {
      it("gives the error the right size", [&]() {
-        ts_document_set_language(document, get_test_language("javascript"));
+        ts_document_set_language(document, load_real_language("javascript"));
        set_text("var x = y;");

        assert_root_node(
@ -308,7 +308,7 @@ describe("Parser", [&]() {

    describe("into the middle of an existing token", [&]() {
      it("updates the parse tree", [&]() {
-        ts_document_set_language(document, get_test_language("javascript"));
+        ts_document_set_language(document, load_real_language("javascript"));
        set_text("abc * 123;");

        assert_root_node(
@ -327,7 +327,7 @@ describe("Parser", [&]() {

    describe("at the end of an existing token", [&]() {
      it("updates the parse tree", [&]() {
-        ts_document_set_language(document, get_test_language("javascript"));
+        ts_document_set_language(document, load_real_language("javascript"));
        set_text("abc * 123;");

        assert_root_node(
@ -346,7 +346,7 @@ describe("Parser", [&]() {

    describe("inserting text into a node containing a extra token", [&]() {
      it("updates the parse tree", [&]() {
-        ts_document_set_language(document, get_test_language("javascript"));
+        ts_document_set_language(document, load_real_language("javascript"));
        set_text("123 *\n"
          "// a-comment\n"
          "abc;");
@ -373,7 +373,7 @@ describe("Parser", [&]() {

    describe("when a critical token is removed", [&]() {
      it("updates the parse tree, creating an error", [&]() {
-        ts_document_set_language(document, get_test_language("javascript"));
+        ts_document_set_language(document, load_real_language("javascript"));
        set_text("123 * 456; 789 * 123;");

        assert_root_node(
@ -392,7 +392,7 @@ describe("Parser", [&]() {

    describe("with external tokens", [&]() {
      it("maintains the external scanner's state during incremental parsing", [&]() {
-        ts_document_set_language(document, get_test_language("python"));
+        ts_document_set_language(document, load_real_language("python"));
        string text = dedent(R"PYTHON(
          if a:
              print b
@ -420,7 +420,7 @@ describe("Parser", [&]() {
    });

    it("does not try to re-use nodes that are within the edited region", [&]() {
-      ts_document_set_language(document, get_test_language("javascript"));
+      ts_document_set_language(document, load_real_language("javascript"));
      set_text("{ x: (b.c) };");

      assert_root_node(
@ -435,7 +435,7 @@ describe("Parser", [&]() {
    });

    it("updates the document's parse count", [&]() {
-      ts_document_set_language(document, get_test_language("javascript"));
+      ts_document_set_language(document, load_real_language("javascript"));
      AssertThat(ts_document_parse_count(document), Equals<size_t>(0));

      set_text("{ x: (b.c) };");
@ -449,7 +449,7 @@ describe("Parser", [&]() {
  describe("lexing", [&]() {
    describe("handling tokens containing wildcard patterns (e.g. comments)", [&]() {
      it("terminates them at the end of the document", [&]() {
-        ts_document_set_language(document, get_test_language("javascript"));
+        ts_document_set_language(document, load_real_language("javascript"));
        set_text("x; // this is a comment");

        assert_root_node(
@ -464,7 +464,7 @@ describe("Parser", [&]() {

    it("recognizes UTF8 characters as single characters", [&]() {
      // 'ΩΩΩ — ΔΔ';
-      ts_document_set_language(document, get_test_language("javascript"));
+      ts_document_set_language(document, load_real_language("javascript"));
      set_text("'\u03A9\u03A9\u03A9 \u2014 \u0394\u0394';");

      assert_root_node(
				`@ -0,0 +1 @@`
				This grammar has an external scanner whose `scan` method needs to be able to check for the validity of an internal token. This is done by including the names of that internal token (`_line_break`) in the grammar's `externals` field.
				`@ -0,0 +1 @@`
				This grammar resolves the conflict shown in the `precedence_on_single_child_missing` grammar by giving `function_call` a negative precedence. This causes reducing the `bar` variable to an expression to be preferred over shifting the `{` token as part of `function_call`.