Rename spec -> test

'Test' is a lot more straightforward of a name.
2017-03-09 20:40:01 -08:00 · 2017-03-09 20:40:01 -08:00 · 6dc0ff359d
commit 6dc0ff359d
parent 7d8daf573e
109 changed files with 44 additions and 44 deletions
--- a/test/fixtures/error_corpus/c_errors.txt
+++ b/test/fixtures/error_corpus/c_errors.txt
@ -0,0 +1,130 @@
+========================================
+Errors inside ifdefs
+========================================
+
+#ifdef something
+int x // no semicolon
+#endif
+
+int a;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int b;
+
+#ifdef __cplusplus
+}
+#endif
+
+int c;
+
+---
+
+(translation_unit
+  (preproc_ifdef (identifier)
+    (ERROR (identifier) (identifier))
+    (comment))
+
+  (declaration (identifier) (identifier))
+
+  (preproc_ifdef (identifier)
+    (ERROR (storage_class_specifier) (string_literal)))
+
+  (declaration (identifier) (identifier))
+
+  (preproc_ifdef (identifier)
+    (ERROR))
+
+  (declaration (identifier) (identifier)))
+
+========================================
+Errors inside blocks
+========================================
+
+int main() {
+  int x;
+  int %$#@
+}
+
+---
+
+(translation_unit
+  (function_definition
+    (identifier)
+    (function_declarator (identifier))
+    (compound_statement
+      (declaration (identifier) (identifier))
+      (ERROR (identifier) (UNEXPECTED '$')))))
+
+========================================
+Errors inside expressions
+========================================
+
+int main() {
+  int x = (123 123);
+}
+
+---
+
+(translation_unit
+  (function_definition
+    (identifier)
+    (function_declarator (identifier))
+    (compound_statement
+      (declaration (identifier) (init_declarator
+        (identifier)
+        (ERROR (number_literal))
+        (number_literal))))))
+
+========================================
+Errors in declarations
+========================================
+
+float x WTF;
+int y = 5;
+
+---
+
+(translation_unit
+  (declaration (identifier) (ERROR (identifier)) (identifier))
+  (declaration (identifier) (init_declarator (identifier) (number_literal))))
+
+==========================================
+Errors at the beginnings of blocks
+==========================================
+
+int a() {
+  struct x = 1;
+  struct y = 2;
+}
+
+int b() {
+  w x y z = 3;
+  w x y z = 4;
+}
+
+---
+
+(translation_unit
+  (function_definition
+    (identifier) (function_declarator (identifier))
+    (compound_statement
+      (ERROR (struct_specifier (identifier)))
+      (expression_statement (number_literal))
+      (ERROR (struct_specifier (identifier)))
+      (expression_statement (number_literal))))
+
+  (function_definition
+    (identifier) (function_declarator (identifier))
+    (compound_statement
+      (declaration
+        (identifier)
+        (init_declarator
+          (ERROR (identifier) (identifier))
+          (identifier) (number_literal)))
+      (declaration
+        (ERROR (identifier) (identifier))
+        (identifier)
+        (init_declarator (identifier) (number_literal))))))
--- a/test/fixtures/error_corpus/javascript_errors.txt
+++ b/test/fixtures/error_corpus/javascript_errors.txt
@ -0,0 +1,157 @@
+===================================================
+one invalid token right after the viable prefix
+===================================================
+
+if (a b) {
+  c d;
+}
+e f;
+
+---
+
+(program
+  (if_statement
+    (ERROR (identifier))
+    (identifier)
+    (statement_block
+      (ERROR (identifier))
+      (expression_statement (identifier))))
+  (ERROR (identifier))
+  (expression_statement (identifier)))
+
+=======================================================
+multiple invalid tokens right after the viable prefix
+=======================================================
+
+if (a b c) {
+  d e f g;
+}
+h i j k;
+
+---
+
+(program
+  (if_statement
+    (ERROR (identifier) (identifier))
+    (identifier)
+    (statement_block
+      (ERROR (identifier) (identifier) (identifier))
+      (expression_statement (identifier))))
+  (expression_statement
+    (ERROR (identifier) (identifier) (identifier))
+    (identifier)))
+
+===================================================
+one invalid subtree right after the viable prefix
+===================================================
+
+if ({a: 'b'} {c: 'd'}) {
+  x = function(a) { b; } function(c) { d; }
+}
+
+---
+
+(program
+  (if_statement
+    (object (pair (identifier) (string)))
+    (ERROR (object (pair (identifier) (string))))
+    (statement_block
+      (expression_statement (assignment
+        (identifier)
+        (ERROR (function
+          (formal_parameters (identifier))
+          (statement_block (expression_statement (identifier)))))
+        (function
+          (formal_parameters (identifier))
+          (statement_block (expression_statement (identifier)))))))))
+
+===================================================
+one invalid token at the end of the file
+===================================================
+
+// skip the equals sign
+a.b =
+---
+
+(program
+  (comment)
+  (trailing_expression_statement
+    (member_access (identifier) (identifier)))
+  (ERROR))
+
+=================================================================
+An invalid token at the end of a construct with extra line breaks
+=================================================================
+
+a(
+  b,
+  c,,
+);
+
+---
+
+(program
+  (expression_statement
+    (function_call (identifier) (arguments
+      (identifier)
+      (identifier)
+      (ERROR)))))
+
+===================================================
+Multi-line chained expressions in var declarations
+===================================================
+
+const one = two
+  .three(four)
+  .five()
+
+---
+
+(program
+  (var_declaration (var_assignment
+    (identifier)
+    (function_call
+      (member_access
+        (function_call
+          (member_access (identifier) (identifier))
+          (arguments (identifier)))
+        (identifier))
+      (arguments)))))
+
+===================================================
+Errors after a sequence of function declarations
+===================================================
+
+/*
+ * The JS grammar has an ambiguity such that these functions
+ * can be parsed either as function declarations or as
+ * function expressions. This ambiguity causes a lot of
+ * splitting and merging in the parse stack. When iterating
+ * the parse stack during an error repair, there would then
+ * be a very large number (> 2^16) of paths through the parse
+ * stack.
+ */
+function a() {}
+function b() {}
+function c() {}
+function e() {}
+function f() {}
+function g() {}
+function h() {}
+function i() {}
+
+var x = !!!
+
+---
+
+(program
+  (comment)
+  (expression_statement (function (identifier) (formal_parameters) (statement_block)))
+  (expression_statement (function (identifier) (formal_parameters) (statement_block)))
+  (expression_statement (function (identifier) (formal_parameters) (statement_block)))
+  (expression_statement (function (identifier) (formal_parameters) (statement_block)))
+  (expression_statement (function (identifier) (formal_parameters) (statement_block)))
+  (expression_statement (function (identifier) (formal_parameters) (statement_block)))
+  (expression_statement (function (identifier) (formal_parameters) (statement_block)))
+  (expression_statement (function (identifier) (formal_parameters) (statement_block)))
+  (trailing_var_declaration (identifier)) (ERROR))
--- a/test/fixtures/error_corpus/json_errors.txt
+++ b/test/fixtures/error_corpus/json_errors.txt
@ -0,0 +1,56 @@
+==========================================
+top-level errors
+==========================================
+
+[}
+
+---
+
+(ERROR)
+
+==========================================
+unexpected tokens
+==========================================
+
+barf
+
+---
+
+(ERROR (UNEXPECTED 'b'))
+
+==========================================
+errors inside arrays
+==========================================
+
+[1, , 2]
+
+---
+(array
+  (number)
+  (ERROR)
+  (number))
+
+==========================================
+errors inside objects
+==========================================
+
+{ "key1": 1, oops }
+
+---
+
+(object (pair (string) (number)) (ERROR (UNEXPECTED 'o')))
+
+==========================================
+errors inside nested objects
+==========================================
+
+{ "key1": { "key2": 1, 2 }, [, "key3": 3 }
+
+---
+
+(object
+  (pair (string) (object
+    (pair (string) (number))
+    (ERROR (number))))
+  (ERROR)
+  (pair (string) (number)))
--- a/test/fixtures/error_corpus/python_errors.txt
+++ b/test/fixtures/error_corpus/python_errors.txt
@ -0,0 +1,29 @@
+==========================================
+errors in if statements
+==========================================
+
+if a is:
+    print b
+    print c
+
+---
+
+(module
+  (if_statement (identifier) (ERROR)
+    (print_statement (identifier))
+    (print_statement (identifier))))
+
+==========================================
+errors in function definitions
+==========================================
+
+def a()::
+  b
+  c
+
+---
+
+(module
+  (function_definition (identifier) (parameters) (ERROR)
+    (expression_statement (identifier))
+    (expression_statement (identifier))))
--- a/test/fixtures/grammars/.gitkeep
+++ b/test/fixtures/grammars/.gitkeep
--- a/test/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/corpus.txt
+++ b/test/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/corpus.txt
@ -0,0 +1,32 @@
+================================================
+anonymous tokens defined with character classes
+================================================
+1234
+---
+
+(first_rule)
+
+=================================================
+anonymous tokens defined with LF escape sequence
+=================================================
+
+
+---
+
+(first_rule)
+
+=================================================
+anonymous tokens defined with CR escape sequence
+=================================================
+
+---
+
+(first_rule)
+
+================================================
+anonymous tokens with quotes
+================================================
+'hello'
+---
+
+(first_rule)
--- a/test/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/grammar.json
+++ b/test/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/grammar.json
@ -0,0 +1,14 @@
+{
+  "name": "anonymous_tokens_with_escaped_chars",
+  "rules": {
+    "first_rule": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "STRING", "value": "\n"},
+        {"type": "STRING", "value": "\r"},
+        {"type": "STRING", "value": "'hello'"},
+        {"type": "PATTERN", "value": "\\d+"}
+      ]
+    }
+  }
+}
--- a/test/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/readme.md
+++ b/test/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/readme.md
@ -0,0 +1 @@
+Every token in a grammar is given a name in the generated parser. Anonymous tokens (tokens specified directly in the body of some larger rule) are named according their content. So when tokens contains characters that aren't valid in a C string literal, we need to escape those characters. This grammar tests that this escaping works. The test is basically that the generated parser compiles succesfully.
--- a/test/fixtures/test_grammars/associativity_left/corpus.txt
+++ b/test/fixtures/test_grammars/associativity_left/corpus.txt
@ -0,0 +1,8 @@
+===================
+chained operations
+===================
+x+y+z
+---
+(expression (math_operation
+  (expression (math_operation (expression (identifier)) (expression (identifier))))
+  (expression (identifier))))
--- a/test/fixtures/test_grammars/associativity_left/grammar.json
+++ b/test/fixtures/test_grammars/associativity_left/grammar.json
@ -0,0 +1,31 @@
+{
+  "name": "associativity_left",
+
+  "rules": {
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "math_operation"},
+        {"type": "SYMBOL", "name": "identifier"}
+      ]
+    },
+
+    "math_operation": {
+      "type": "PREC_LEFT",
+      "value": 0,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "SYMBOL", "name": "expression"},
+          {"type": "STRING", "value": "+"},
+          {"type": "SYMBOL", "name": "expression"}
+        ]
+      }
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-zA-Z]+"
+    }
+  }
+}
--- a/test/fixtures/test_grammars/associativity_missing/expected_error.txt
+++ b/test/fixtures/test_grammars/associativity_missing/expected_error.txt
@ -0,0 +1,13 @@
+Unresolved conflict for symbol sequence:
+
+  expression  '+'  expression  •  '+'  …
+
+Possible interpretations:
+
+  1:  (math_operation  expression  '+'  expression)  •  '+'  …
+  2:  expression  '+'  (math_operation  expression  •  '+'  expression)
+
+Possible resolutions:
+
+  1:  Specify a left or right associativity in `math_operation`
+  2:  Add a conflict for these rules: `math_operation`
--- a/test/fixtures/test_grammars/associativity_missing/grammar.json
+++ b/test/fixtures/test_grammars/associativity_missing/grammar.json
@ -0,0 +1,27 @@
+{
+  "name": "associativity_missing",
+
+  "rules": {
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "math_operation"},
+        {"type": "SYMBOL", "name": "identifier"}
+      ]
+    },
+
+    "math_operation": {
+      "type": "SEQ",
+      "members": [
+        {"type": "SYMBOL", "name": "expression"},
+        {"type": "STRING", "value": "+"},
+        {"type": "SYMBOL", "name": "expression"}
+      ]
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-zA-Z]+"
+    }
+  }
+}
--- a/test/fixtures/test_grammars/associativity_right/corpus.txt
+++ b/test/fixtures/test_grammars/associativity_right/corpus.txt
@ -0,0 +1,8 @@
+===================
+chained operations
+===================
+x+y+z
+---
+(expression (math_operation
+  (expression (identifier))
+  (expression (math_operation (expression (identifier)) (expression (identifier))))))
--- a/test/fixtures/test_grammars/associativity_right/grammar.json
+++ b/test/fixtures/test_grammars/associativity_right/grammar.json
@ -0,0 +1,31 @@
+{
+  "name": "associativity_right",
+
+  "rules": {
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "math_operation"},
+        {"type": "SYMBOL", "name": "identifier"}
+      ]
+    },
+
+    "math_operation": {
+      "type": "PREC_RIGHT",
+      "value": 0,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "SYMBOL", "name": "expression"},
+          {"type": "STRING", "value": "+"},
+          {"type": "SYMBOL", "name": "expression"}
+        ]
+      }
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-zA-Z]+"
+    }
+  }
+}
--- a/test/fixtures/test_grammars/conflicting_precedence/expected_error.txt
+++ b/test/fixtures/test_grammars/conflicting_precedence/expected_error.txt
@ -0,0 +1,15 @@
+Unresolved conflict for symbol sequence:
+
+  expression  '+'  expression  •  '*'  …
+
+Possible interpretations:
+
+  1:  (sum  expression  '+'  expression)  •  '*'  …
+  2:  expression  '+'  (product  expression  •  '*'  expression)
+  3:  expression  '+'  (other_thing  expression  •  '*'  '*')
+
+Possible resolutions:
+
+  1:  Specify a higher precedence in `product` and `other_thing` than in the other rules.
+  2:  Specify a higher precedence in `sum` than in the other rules.
+  3:  Add a conflict for these rules: `sum` `product` `other_thing`
--- a/test/fixtures/test_grammars/conflicting_precedence/grammar.json
+++ b/test/fixtures/test_grammars/conflicting_precedence/grammar.json
@ -0,0 +1,58 @@
+{
+  "name": "conflicting_precedence",
+
+  "rules": {
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "sum"},
+        {"type": "SYMBOL", "name": "product"},
+        {"type": "SYMBOL", "name": "other_thing"}
+      ]
+    },
+
+    "sum": {
+      "type": "PREC_LEFT",
+      "value": 0,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "SYMBOL", "name": "expression"},
+          {"type": "STRING", "value": "+"},
+          {"type": "SYMBOL", "name": "expression"}
+        ]
+      }
+    },
+
+    "product": {
+      "type": "PREC_LEFT",
+      "value": 1,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "SYMBOL", "name": "expression"},
+          {"type": "STRING", "value": "*"},
+          {"type": "SYMBOL", "name": "expression"}
+        ]
+      }
+    },
+
+    "other_thing": {
+      "type": "PREC_LEFT",
+      "value": -1,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "SYMBOL", "name": "expression"},
+          {"type": "STRING", "value": "*"},
+          {"type": "STRING", "value": "*"}
+        ]
+      }
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-zA-Z]+"
+    }
+  }
+}
--- a/test/fixtures/test_grammars/epsilon_rules/expected_error.txt
+++ b/test/fixtures/test_grammars/epsilon_rules/expected_error.txt
@ -0,0 +1,2 @@
+The rule `rule_2` matches the empty string.
+Tree-sitter currently does not support syntactic rules that match the empty string.
--- a/test/fixtures/test_grammars/epsilon_rules/grammar.json
+++ b/test/fixtures/test_grammars/epsilon_rules/grammar.json
@ -0,0 +1,15 @@
+{
+  "name": "epsilon_rules",
+
+  "rules": {
+    "rule_1": {"type": "SYMBOL", "name": "rule_2"},
+
+    "rule_2": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "rule_1"},
+        {"type": "BLANK"}
+      ]
+    }
+  }
+}
--- a/test/fixtures/test_grammars/external_and_internal_tokens/corpus.txt
+++ b/test/fixtures/test_grammars/external_and_internal_tokens/corpus.txt
@ -0,0 +1,41 @@
+=========================================
+single-line statements - internal tokens
+=========================================
+
+a b
+
+---
+
+(statement (variable) (variable) (line_break))
+
+=========================================
+multi-line statements - internal tokens
+=========================================
+
+a
+b
+
+---
+
+(statement (variable) (variable) (line_break))
+
+=========================================
+single-line statements - external tokens
+=========================================
+
+'hello' 'world'
+
+---
+
+(statement (string) (string) (line_break))
+
+=========================================
+multi-line statements - external tokens
+=========================================
+
+'hello'
+'world'
+
+---
+
+(statement (string) (string) (line_break))
--- a/test/fixtures/test_grammars/external_and_internal_tokens/grammar.json
+++ b/test/fixtures/test_grammars/external_and_internal_tokens/grammar.json
@ -0,0 +1,36 @@
+{
+  "name": "external_and_internal_tokens",
+
+  "externals": [
+    "string",
+    "line_break"
+  ],
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"}
+  ],
+
+  "rules": {
+    "statement": {
+      "type": "SEQ",
+      "members": [
+        {"type": "SYMBOL", "name": "_expression"},
+        {"type": "SYMBOL", "name": "_expression"},
+        {"type": "SYMBOL", "name": "line_break"}
+      ]
+    },
+
+    "_expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "string"},
+        {"type": "SYMBOL", "name": "variable"},
+        {"type": "SYMBOL", "name": "number"}
+      ]
+    },
+
+    "variable": {"type": "PATTERN", "value": "\\a+"},
+    "number": {"type": "PATTERN", "value": "\\d+"},
+    "line_break": {"type": "STRING", "value": "\n"}
+  }
+}
--- a/test/fixtures/test_grammars/external_and_internal_tokens/readme.md
+++ b/test/fixtures/test_grammars/external_and_internal_tokens/readme.md
@ -0,0 +1 @@
+This grammar has an external scanner whose `scan` method needs to be able to check for the validity of an *internal* token. This is done by including the names of that internal token (`_line_break`) in the grammar's `externals` field.
--- a/test/fixtures/test_grammars/external_and_internal_tokens/scanner.c
+++ b/test/fixtures/test_grammars/external_and_internal_tokens/scanner.c
@ -0,0 +1,55 @@
+#include <tree_sitter/parser.h>
+
+enum {
+  STRING,
+  LINE_BREAK
+};
+
+void *tree_sitter_external_and_internal_tokens_external_scanner_create() { return NULL; }
+
+void tree_sitter_external_and_internal_tokens_external_scanner_destroy(void *payload) {}
+
+void tree_sitter_external_and_internal_tokens_external_scanner_reset(void *payload) {}
+
+bool tree_sitter_external_and_internal_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) { return true; }
+
+void tree_sitter_external_and_internal_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {}
+
+bool tree_sitter_external_and_internal_tokens_external_scanner_scan(
+  void *payload, TSLexer *lexer, const bool *whitelist) {
+
+  // If a line-break is a valid lookahead token, only skip spaces.
+  if (whitelist[LINE_BREAK]) {
+    while (lexer->lookahead == ' ') {
+      lexer->advance(lexer, true);
+    }
+
+    if (lexer->lookahead == '\n') {
+      lexer->advance(lexer, false);
+      lexer->result_symbol = LINE_BREAK;
+      return true;
+    }
+  }
+
+  // If a line-break is not a valid lookahead token, skip line breaks as well
+  // as spaces.
+  if (whitelist[STRING]) {
+    while (lexer->lookahead == ' ' || lexer->lookahead == '\n') {
+      lexer->advance(lexer, true);
+    }
+
+    if (lexer->lookahead == '\'') {
+      lexer->advance(lexer, false);
+
+      while (lexer->lookahead != '\'') {
+        lexer->advance(lexer, false);
+      }
+
+      lexer->advance(lexer, false);
+      lexer->result_symbol = STRING;
+      return true;
+    }
+  }
+
+  return false;
+}
--- a/test/fixtures/test_grammars/external_extra_tokens/corpus.txt
+++ b/test/fixtures/test_grammars/external_extra_tokens/corpus.txt
@ -0,0 +1,10 @@
+========================
+extra external tokens
+========================
+
+x = # a comment
+y
+
+---
+
+(assignment (variable) (comment) (variable))
--- a/test/fixtures/test_grammars/external_extra_tokens/grammar.json
+++ b/test/fixtures/test_grammars/external_extra_tokens/grammar.json
@ -0,0 +1,25 @@
+{
+  "name": "external_extra_tokens",
+
+  "externals": [
+    "comment"
+  ],
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"},
+    {"type": "SYMBOL", "name": "comment"}
+  ],
+
+  "rules": {
+    "assignment": {
+      "type": "SEQ",
+      "members": [
+        {"type": "SYMBOL", "name": "variable"},
+        {"type": "STRING", "value": "="},
+        {"type": "SYMBOL", "name": "variable"}
+      ]
+    },
+
+    "variable": {"type": "PATTERN", "value": "\\a+"}
+  }
+}
--- a/test/fixtures/test_grammars/external_extra_tokens/scanner.c
+++ b/test/fixtures/test_grammars/external_extra_tokens/scanner.c
@ -0,0 +1,36 @@
+#include <tree_sitter/parser.h>
+
+enum {
+  COMMENT,
+};
+
+void *tree_sitter_external_extra_tokens_external_scanner_create() { return NULL; }
+
+void tree_sitter_external_extra_tokens_external_scanner_destroy(void *payload) {}
+
+void tree_sitter_external_extra_tokens_external_scanner_reset(void *payload) {}
+
+bool tree_sitter_external_extra_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) { return true; }
+
+void tree_sitter_external_extra_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {}
+
+bool tree_sitter_external_extra_tokens_external_scanner_scan(
+  void *payload, TSLexer *lexer, const bool *whitelist) {
+
+  while (lexer->lookahead == ' ') {
+    lexer->advance(lexer, true);
+  }
+
+  if (lexer->lookahead == '#') {
+    lexer->advance(lexer, false);
+    while (lexer->lookahead != '\n') {
+      lexer->advance(lexer, false);
+    }
+
+    lexer->result_symbol = COMMENT;
+    return true;
+  }
+
+  return false;
+}
+
--- a/test/fixtures/test_grammars/external_tokens/corpus.txt
+++ b/test/fixtures/test_grammars/external_tokens/corpus.txt
@ -0,0 +1,22 @@
+========================
+simple external tokens
+=========================
+
+x + %(sup (external) scanner?)
+
+---
+
+(expression (sum (expression (identifier)) (expression (string))))
+
+==================================
+external tokens that require state
+==================================
+
+%{sup {} #{x + y} {} scanner?}
+
+---
+
+(expression (string
+  (expression (sum
+    (expression (identifier))
+    (expression (identifier))))))
--- a/test/fixtures/test_grammars/external_tokens/grammar.json
+++ b/test/fixtures/test_grammars/external_tokens/grammar.json
@ -0,0 +1,57 @@
+{
+  "name": "external_tokens",
+
+  "externals": [
+    "_percent_string",
+    "_percent_string_start",
+    "_percent_string_end"
+  ],
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"}
+  ],
+
+  "rules": {
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "string"},
+        {"type": "SYMBOL", "name": "sum"},
+        {"type": "SYMBOL", "name": "identifier"}
+      ]
+    },
+
+    "sum": {
+      "type": "PREC_LEFT",
+      "value": 0,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "SYMBOL", "name": "expression"},
+          {"type": "STRING", "value": "+"},
+          {"type": "SYMBOL", "name": "expression"}
+        ]
+      }
+    },
+
+    "string": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "_percent_string"},
+        {
+          "type": "SEQ",
+          "members": [
+            {"type": "SYMBOL", "name": "_percent_string_start"},
+            {"type": "SYMBOL", "name": "expression"},
+            {"type": "SYMBOL", "name": "_percent_string_end"}
+          ]
+        },
+      ]
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "\\a+"
+    }
+  }
+}
--- a/test/fixtures/test_grammars/external_tokens/scanner.c
+++ b/test/fixtures/test_grammars/external_tokens/scanner.c
@ -0,0 +1,114 @@
+#include <tree_sitter/parser.h>
+
+enum {
+  percent_string,
+  percent_string_start,
+  percent_string_end
+};
+
+typedef struct {
+  int32_t open_delimiter;
+  int32_t close_delimiter;
+  uint32_t depth;
+} Scanner;
+
+void *tree_sitter_external_tokens_external_scanner_create() {
+  Scanner *scanner = malloc(sizeof(Scanner));
+  *scanner = (Scanner){
+    .open_delimiter = 0,
+    .close_delimiter = 0,
+    .depth = 0
+  };
+  return scanner;
+}
+
+void tree_sitter_external_tokens_external_scanner_destroy(void *payload) {
+  free(payload);
+}
+
+void tree_sitter_external_tokens_external_scanner_reset(void *payload) {}
+
+bool tree_sitter_external_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) { return true; }
+
+void tree_sitter_external_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {}
+
+bool tree_sitter_external_tokens_external_scanner_scan(
+  void *payload, TSLexer *lexer, const bool *whitelist) {
+  Scanner *scanner = payload;
+
+  if (whitelist[percent_string]) {
+    while (lexer->lookahead == ' ' ||
+           lexer->lookahead == '\t' ||
+           lexer->lookahead == '\n') {
+      lexer->advance(lexer, true);
+    }
+
+    if (lexer->lookahead != '%') return false;
+    lexer->advance(lexer, false);
+
+    switch (lexer->lookahead) {
+      case '(':
+        scanner->open_delimiter = '(';
+        scanner->close_delimiter = ')';
+        scanner->depth = 1;
+        break;
+      case '[':
+        scanner->open_delimiter = '[';
+        scanner->close_delimiter = ']';
+        scanner->depth = 1;
+        break;
+      case '{':
+        scanner->open_delimiter = '{';
+        scanner->close_delimiter = '}';
+        scanner->depth = 1;
+        break;
+      default:
+        return false;
+    }
+
+    lexer->advance(lexer, false);
+
+    for (;;) {
+      if (scanner->depth == 0) {
+        lexer->result_symbol = percent_string;
+        return true;
+      }
+
+      if (lexer->lookahead == scanner->open_delimiter) {
+        scanner->depth++;
+      } else if (lexer->lookahead == scanner->close_delimiter) {
+        scanner->depth--;
+      } else if (lexer->lookahead == '#') {
+        lexer->advance(lexer, false);
+        if (lexer->lookahead == '{') {
+          lexer->advance(lexer, false);
+          lexer->result_symbol = percent_string_start;
+          return true;
+        }
+      }
+
+      lexer->advance(lexer, false);
+    }
+  } else if (whitelist[percent_string_end]) {
+    if (lexer->lookahead != '}') return false;
+    lexer->advance(lexer, false);
+
+    for (;;) {
+      if (scanner->depth == 0) {
+        lexer->result_symbol = percent_string_end;
+        return true;
+      }
+
+      if (lexer->lookahead == scanner->open_delimiter) {
+        scanner->depth++;
+      } else if (lexer->lookahead == scanner->close_delimiter) {
+        scanner->depth--;
+      }
+
+      lexer->advance(lexer, false);
+    }
+  }
+
+  return false;
+}
+
--- a/test/fixtures/test_grammars/lexical_conflicts_due_to_state_merging/corpus.txt
+++ b/test/fixtures/test_grammars/lexical_conflicts_due_to_state_merging/corpus.txt
@ -0,0 +1,33 @@
+========================
+regexes
+========================
+
+/a+/
+
+---
+
+(expression (regex))
+
+========================
+conditionals
+========================
+
+(if (1) /a+/)
+
+---
+
+(expression (parenthesized (expression (conditional
+  (parenthesized (expression (number)))
+  (expression (regex))))))
+
+========================
+quotients
+========================
+
+((1) / 2)
+
+---
+
+(expression (parenthesized (expression (quotient
+  (expression (parenthesized (expression (number))))
+  (expression (number))))))
--- a/test/fixtures/test_grammars/lexical_conflicts_due_to_state_merging/grammar.json
+++ b/test/fixtures/test_grammars/lexical_conflicts_due_to_state_merging/grammar.json
@ -0,0 +1,65 @@
+{
+  "name": "lexical_conflicts_due_to_state_merging",
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"}
+  ],
+
+  "rules": {
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "conditional"},
+        {"type": "SYMBOL", "name": "regex"},
+        {"type": "SYMBOL", "name": "quotient"},
+        {"type": "SYMBOL", "name": "number"},
+        {"type": "SYMBOL", "name": "parenthesized"}
+      ]
+    },
+
+    "conditional": {
+      "type": "PREC_LEFT",
+      "value": 1,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "STRING", "value": "if"},
+          {"type": "SYMBOL", "name": "parenthesized"},
+          {"type": "SYMBOL", "name": "expression"}
+        ]
+      }
+    },
+
+    "quotient": {
+      "type": "PREC_LEFT",
+      "value": 0,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "SYMBOL", "name": "expression"},
+          {"type": "STRING", "value": "/"},
+          {"type": "SYMBOL", "name": "expression"}
+        ]
+      }
+    },
+
+    "regex": {
+      "type": "PATTERN",
+      "value": "/[^/\n]+/"
+    },
+
+    "number": {
+      "type": "PATTERN",
+      "value": "\\d+"
+    },
+
+    "parenthesized": {
+      "type": "SEQ",
+      "members": [
+        {"type": "STRING", "value": "("},
+        {"type": "SYMBOL", "name": "expression"},
+        {"type": "STRING", "value": ")"}
+      ]
+    }
+  }
+}
--- a/test/fixtures/test_grammars/lexical_conflicts_due_to_state_merging/readme.md
+++ b/test/fixtures/test_grammars/lexical_conflicts_due_to_state_merging/readme.md
@ -0,0 +1,20 @@
+This grammar has two tokens, `regex` and `/`, which conflict: when a `/` character is encountered, the lexer can't tell if it is part of a `/` token or a `regex` by looking ahead only one character. But because these tokens are never valid in the same position, this doesn't cause any problem.
+
+When merging similar parse states in order to reduce the size of the parse table, it is important that we avoid merging states in a way that causes these two tokens to both appear as valid lookahead symbols in a given state.
+
+If we weren't careful, this grammar would cause that to happen, because a `regex` is valid in this state:
+
+```
+(if (1) /\w+/)
+       ^
+```
+
+and a `/` is valid in this state:
+
+
+```
+((1) / 2)
+    ^
+```
+
+And these two states would otherwise be candidates for merging, because they both contain only the action `reduce(parenthesized, 3)`.
--- a/test/fixtures/test_grammars/precedence_on_single_child_missing/expected_error.txt
+++ b/test/fixtures/test_grammars/precedence_on_single_child_missing/expected_error.txt
@ -0,0 +1,15 @@
+Unresolved conflict for symbol sequence:
+
+  identifier  •  '{'  …
+
+Possible interpretations:
+
+  1:  (expression  identifier)  •  '{'  …
+  2:  (function_call  identifier  •  block)
+
+Possible resolutions:
+
+  1:  Specify a higher precedence in `function_call` than in the other rules.
+  2:  Specify a higher precedence in `expression` than in the other rules.
+  3:  Specify a left or right associativity in `expression`
+  4:  Add a conflict for these rules: `expression` `function_call`
--- a/test/fixtures/test_grammars/precedence_on_single_child_missing/grammar.json
+++ b/test/fixtures/test_grammars/precedence_on_single_child_missing/grammar.json
@ -0,0 +1,63 @@
+{
+  "name": "precedence_on_single_child_missing",
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"}
+  ],
+
+  "rules": {
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "function_call"},
+        {"type": "SYMBOL", "name": "identifier"}
+      ]
+    },
+
+    "function_call": {
+      "type": "PREC_RIGHT",
+      "value": 0,
+      "content": {
+        "type": "CHOICE",
+        "members": [
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "expression"}
+            ]
+          },
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "block"}
+            ]
+          },
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "expression"},
+              {"type": "SYMBOL", "name": "block"}
+            ]
+          }
+        ]
+      }
+    },
+
+    "block": {
+      "type": "SEQ",
+      "members": [
+        {"type": "STRING", "value": "{"},
+        {"type": "SYMBOL", "name": "expression"},
+        {"type": "STRING", "value": "}"}
+      ]
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-zA-Z]+"
+    }
+  }
+}
--- a/test/fixtures/test_grammars/precedence_on_single_child_missing/readme.md
+++ b/test/fixtures/test_grammars/precedence_on_single_child_missing/readme.md
@ -0,0 +1,15 @@
+This language has function calls similar to Ruby's, with no parentheses required, and optional blocks.
+
+There is a shift/reduce conflict here:
+
+```
+foo bar { baz }
+       ^
+```
+
+The possible actions are:
+
+1. `reduce(expression, 1)` - `bar` is an expression being passed to the `foo` function.
+2. `shift` - `bar` is a function being called with the block `{ baz }`
+
+The grammars `precedence_on_single_child_negative` and `precedence_on_single_child_positive` show possible resolutions to this conflict.
--- a/test/fixtures/test_grammars/precedence_on_single_child_negative/corpus.txt
+++ b/test/fixtures/test_grammars/precedence_on_single_child_negative/corpus.txt
@ -0,0 +1,12 @@
+===========================
+function calls with blocks
+===========================
+
+foo bar { baz }
+
+---
+
+(expression (function_call
+  (identifier)
+  (expression (identifier))
+  (block (expression (identifier)))))
--- a/test/fixtures/test_grammars/precedence_on_single_child_negative/grammar.json
+++ b/test/fixtures/test_grammars/precedence_on_single_child_negative/grammar.json
@ -0,0 +1,63 @@
+{
+  "name": "precedence_on_single_child_negative",
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"}
+  ],
+
+  "rules": {
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "function_call"},
+        {"type": "SYMBOL", "name": "identifier"}
+      ]
+    },
+
+    "function_call": {
+      "type": "PREC_RIGHT",
+      "value": -1,
+      "content": {
+        "type": "CHOICE",
+        "members": [
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "expression"}
+            ]
+          },
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "block"}
+            ]
+          },
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "expression"},
+              {"type": "SYMBOL", "name": "block"}
+            ]
+          }
+        ]
+      }
+    },
+
+    "block": {
+      "type": "SEQ",
+      "members": [
+        {"type": "STRING", "value": "{"},
+        {"type": "SYMBOL", "name": "expression"},
+        {"type": "STRING", "value": "}"}
+      ]
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-zA-Z]+"
+    }
+  }
+}
--- a/test/fixtures/test_grammars/precedence_on_single_child_negative/readme.md
+++ b/test/fixtures/test_grammars/precedence_on_single_child_negative/readme.md
@ -0,0 +1 @@
+This grammar resolves the conflict shown in the `precedence_on_single_child_missing` grammar by giving `function_call` a negative precedence. This causes reducing the `bar` variable to an expression to be preferred over shifting the `{` token as part of `function_call`.
--- a/test/fixtures/test_grammars/precedence_on_single_child_positive/corpus.txt
+++ b/test/fixtures/test_grammars/precedence_on_single_child_positive/corpus.txt
@ -0,0 +1,13 @@
+===========================
+function calls with blocks
+===========================
+
+foo bar { baz }
+
+---
+
+(expression (function_call
+  (identifier)
+  (expression (function_call
+    (identifier)
+    (block (expression (identifier)))))))
--- a/test/fixtures/test_grammars/precedence_on_single_child_positive/grammar.json
+++ b/test/fixtures/test_grammars/precedence_on_single_child_positive/grammar.json
@ -0,0 +1,63 @@
+{
+  "name": "precedence_on_single_child_positive",
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"}
+  ],
+
+  "rules": {
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "function_call"},
+        {"type": "SYMBOL", "name": "identifier"}
+      ]
+    },
+
+    "function_call": {
+      "type": "PREC_RIGHT",
+      "value": 1,
+      "content": {
+        "type": "CHOICE",
+        "members": [
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "expression"}
+            ]
+          },
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "block"}
+            ]
+          },
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "expression"},
+              {"type": "SYMBOL", "name": "block"}
+            ]
+          }
+        ]
+      }
+    },
+
+    "block": {
+      "type": "SEQ",
+      "members": [
+        {"type": "STRING", "value": "{"},
+        {"type": "SYMBOL", "name": "expression"},
+        {"type": "STRING", "value": "}"}
+      ]
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-zA-Z]+"
+    }
+  }
+}
--- a/test/fixtures/test_grammars/precedence_on_single_child_positive/readme.md
+++ b/test/fixtures/test_grammars/precedence_on_single_child_positive/readme.md
@ -0,0 +1 @@
+This grammar resolves the conflict shown in the `precedence_on_single_child_missing` grammar by giving `function_call` a positive precedence. This causes shifting the `{` token as part of `function_call` to be preferred over reducing the `bar` variable to an expression.
--- a/test/fixtures/test_grammars/precedence_on_subsequence/corpus.txt
+++ b/test/fixtures/test_grammars/precedence_on_subsequence/corpus.txt
@ -0,0 +1,24 @@
+==========================================
+curly brace blocks with high precedence
+==========================================
+
+a b {}
+
+---
+
+(expression (function_call
+  (identifier)
+  (expression (function_call (identifier) (block)))))
+
+==========================================
+do blocks with low precedence
+==========================================
+
+a b do end
+
+---
+
+(expression (function_call
+  (identifier)
+  (expression (identifier))
+  (do_block)))
--- a/test/fixtures/test_grammars/precedence_on_subsequence/grammar.json
+++ b/test/fixtures/test_grammars/precedence_on_subsequence/grammar.json
@ -0,0 +1,135 @@
+{
+  "name": "precedence_on_subsequence",
+
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"}
+  ],
+
+  "rules": {
+    "expression": {
+      "type": "PREC_LEFT",
+      "value": 0,
+      "content": {
+        "type": "CHOICE",
+        "members": [
+          {"type": "SYMBOL", "name": "function_call"},
+          {"type": "SYMBOL", "name": "identifier"},
+          {"type": "SYMBOL", "name": "scope_resolution"}
+        ]
+      }
+    },
+
+    "function_call": {
+      "type": "CHOICE",
+      "members": [
+        {
+          "type": "SEQ",
+          "members": [
+            {"type": "SYMBOL", "name": "identifier"},
+            {"type": "SYMBOL", "name": "expression"}
+          ]
+        },
+
+        {
+          "type": "PREC",
+          "value": 1,
+          "content": {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "block"}
+            ]
+          }
+        },
+
+        {
+          "type": "PREC",
+          "value": -1,
+          "content": {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "identifier"},
+              {"type": "SYMBOL", "name": "do_block"}
+            ]
+          }
+        },
+
+        {
+          "type": "SEQ",
+          "members": [
+            {"type": "SYMBOL", "name": "identifier"},
+            {
+              "type": "PREC",
+              "value": 1,
+              "content": {
+                "type": "SEQ",
+                "members": [
+                  {"type": "SYMBOL", "name": "expression"},
+                  {"type": "SYMBOL", "name": "block"}
+                ]
+              }
+            }
+          ]
+        },
+
+        {
+          "type": "SEQ",
+          "members": [
+            {"type": "SYMBOL", "name": "identifier"},
+            {
+              "type": "PREC",
+              "value": -1,
+              "content": {
+                "type": "SEQ",
+                "members": [
+                  {"type": "SYMBOL", "name": "expression"},
+                  {"type": "SYMBOL", "name": "do_block"}
+                ]
+              }
+            }
+          ]
+        }
+      ]
+    },
+
+    "scope_resolution": {
+      "type": "PREC_LEFT",
+      "value": 1,
+      "content": {
+        "type": "CHOICE",
+        "members": [
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "SYMBOL", "name": "expression"},
+              {"type": "STRING", "value": "::"},
+              {"type": "SYMBOL", "name": "expression"}
+            ]
+          },
+          {
+            "type": "SEQ",
+            "members": [
+              {"type": "STRING", "value": "::"},
+              {"type": "SYMBOL", "name": "expression"},
+            ]
+          }
+        ]
+      }
+    },
+
+    "block": {
+      "type": "STRING",
+      "value": "{}"
+    },
+
+    "do_block": {
+      "type": "STRING",
+      "value": "do end"
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-zA-Z]+"
+    }
+  }
+}
--- a/test/fixtures/test_grammars/readme.md
+++ b/test/fixtures/test_grammars/readme.md
@ -0,0 +1,3 @@
+These small grammars demonstrate specific features or test for certain specific regressions.
+
+For some of them, compilation is expected to fail with a given error message. For others, the resulting parser is expected to produce certain trees.
--- a/test/fixtures/test_grammars/readme_grammar/corpus.txt
+++ b/test/fixtures/test_grammars/readme_grammar/corpus.txt
@ -0,0 +1,13 @@
+==================================
+the readme example
+==================================
+
+a + b * c
+
+---
+
+(expression (sum
+  (expression (variable))
+  (expression (product
+     (expression (variable))
+     (expression (variable))))))
--- a/test/fixtures/test_grammars/readme_grammar/grammar.json
+++ b/test/fixtures/test_grammars/readme_grammar/grammar.json
@ -0,0 +1,67 @@
+{
+  "name": "readme_grammar",
+
+  // Things that can appear anywhere in the language, like comments
+  // and whitespace, are expressed as 'extras'.
+  "extras": [
+    {"type": "PATTERN", "value": "\\s"},
+    {"type": "SYMBOL", "name": "comment"}
+  ],
+
+  "rules": {
+
+    // The first rule listed in the grammar becomes the 'start rule'.
+    "expression": {
+      "type": "CHOICE",
+      "members": [
+        {"type": "SYMBOL", "name": "sum"},
+        {"type": "SYMBOL", "name": "product"},
+        {"type": "SYMBOL", "name": "number"},
+        {"type": "SYMBOL", "name": "variable"},
+        {
+          "type": "SEQ",
+          "members": [
+            {"type": "STRING", "value": "("},
+            {"type": "SYMBOL", "name": "expression"},
+            {"type": "STRING", "value": ")"}
+          ]
+        }
+      ]
+    },
+
+    // Tokens like '+' and '*' are described directly within the
+    // grammar's rules, as opposed to in a seperate lexer description.
+    "sum": {
+      "type": "PREC_LEFT",
+      "value": 1,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "SYMBOL", "name": "expression"},
+          {"type": "STRING", "value": "+"},
+          {"type": "SYMBOL", "name": "expression"}
+        ]
+      }
+    },
+
+    // Ambiguities can be resolved at compile time by assigning precedence
+    // values to rule subtrees.
+    "product": {
+      "type": "PREC_LEFT",
+      "value": 2,
+      "content": {
+        "type": "SEQ",
+        "members": [
+          {"type": "SYMBOL", "name": "expression"},
+          {"type": "STRING", "value": "*"},
+          {"type": "SYMBOL", "name": "expression"}
+        ]
+      }
+    },
+
+    // Tokens can be specified using ECMAScript regexps.
+    "number": {"type": "PATTERN", "value": "\\d+"},
+    "comment": {"type": "PATTERN", "value": "#.*"},
+    "variable": {"type": "PATTERN", "value": "[a-zA-Z]\\w*"}
+  }
+}
--- a/test/fixtures/test_grammars/start_rule_is_blank/corpus.txt
+++ b/test/fixtures/test_grammars/start_rule_is_blank/corpus.txt
@ -0,0 +1,7 @@
+========================
+the empty string
+=======================
+
+---
+
+(first_rule)
--- a/test/fixtures/test_grammars/start_rule_is_blank/grammar.json
+++ b/test/fixtures/test_grammars/start_rule_is_blank/grammar.json
@ -0,0 +1,6 @@
+{
+  "name": "start_rule_is_blank",
+  "rules": {
+    "first_rule": {"type": "BLANK"}
+  }
+}
--- a/test/fixtures/test_grammars/start_rule_is_token/corpus.txt
+++ b/test/fixtures/test_grammars/start_rule_is_token/corpus.txt
@ -0,0 +1,6 @@
+===========================
+the single token
+==========================
+the-value
+---
+(first_rule)
--- a/test/fixtures/test_grammars/start_rule_is_token/grammar.json
+++ b/test/fixtures/test_grammars/start_rule_is_token/grammar.json
@ -0,0 +1,6 @@
+{
+  "name": "start_rule_is_token",
+  "rules": {
+    "first_rule": {"type": "STRING", "value": "the-value"}
+  }
+}
				`@ -0,0 +1 @@`
				`Every token in a grammar is given a name in the generated parser. Anonymous tokens (tokens specified directly in the body of some larger rule) are named according their content. So when tokens contains characters that aren't valid in a C string literal, we need to escape those characters. This grammar tests that this escaping works. The test is basically that the generated parser compiles succesfully.`
				`@ -0,0 +1 @@`
				This grammar has an external scanner whose `scan` method needs to be able to check for the validity of an internal token. This is done by including the names of that internal token (`_line_break`) in the grammar's `externals` field.
				`@ -0,0 +1 @@`
				This grammar resolves the conflict shown in the `precedence_on_single_child_missing` grammar by giving `function_call` a negative precedence. This causes reducing the `bar` variable to an expression to be preferred over shifting the `{` token as part of `function_call`.