From 23e4596ec1d7de813fe88d05eca072d0f8249dfc Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Thu, 26 Jul 2018 17:06:09 -0700
Subject: [PATCH 1/3] Add test for handling of precedence within tokens

---
 .../precedence_on_token/corpus.txt            | 22 +++++++
 .../precedence_on_token/grammar.json          | 61 +++++++++++++++++++
 test/integration/test_grammars.cc             |  2 -
 3 files changed, 83 insertions(+), 2 deletions(-)
 create mode 100644 test/fixtures/test_grammars/precedence_on_token/corpus.txt
 create mode 100644 test/fixtures/test_grammars/precedence_on_token/grammar.json
diff --git a/test/fixtures/test_grammars/precedence_on_token/corpus.txt b/test/fixtures/test_grammars/precedence_on_token/corpus.txt
new file mode 100644
index 00000000..8c8fd541
--- /dev/null
+++ b/test/fixtures/test_grammars/precedence_on_token/corpus.txt
@@ -0,0 +1,22 @@
+==========================================
+simple strings
+==========================================
+
+"hi"
+
+---
+
+(program (string))
+
+==========================================
+strings starting with double slashes
+==========================================
+
+// comment
+"//not \t a \t comment"
+
+---
+
+(program
+  (comment)
+  (string (escape_sequence) (escape_sequence)))
diff --git a/test/fixtures/test_grammars/precedence_on_token/grammar.json b/test/fixtures/test_grammars/precedence_on_token/grammar.json
new file mode 100644
index 00000000..d9557add
--- /dev/null
+++ b/test/fixtures/test_grammars/precedence_on_token/grammar.json
@@ -0,0 +1,61 @@
+{
+  "name": "precedence_on_token",
+
+  "extras": [
+    {"type": "SYMBOL", "name": "comment"},
+    {"type": "PATTERN", "value": "\\s"},
+  ],
+
+  "rules": {
+    "program": {
+      "type": "REPEAT",
+      "content": {
+        "type": "SYMBOL",
+        "name": "string"
+      }
+    },
+
+    "comment": {
+      "type": "PATTERN",
+      "value": "//.*"
+    },
+
+    "string": {
+      "type": "SEQ",
+      "members": [
+        {"type": "STRING", "value": "\""},
+
+        {
+          "type": "REPEAT",
+          "content": {
+            "type": "CHOICE",
+            "members": [
+              {
+                "type": "TOKEN",
+                "content": {
+                  "type": "PREC",
+                  "value": 1,
+                  "content": {
+                    "type": "PATTERN",
+                    "value": "[^\"\n\\\\]+"
+                  }
+                }
+              },
+              {
+                "type": "SYMBOL",
+                "name": "escape_sequence"
+              }
+            ]
+          }
+        },
+
+        {"type": "STRING", "value": "\""}
+      ]
+    },
+
+    "escape_sequence": {
+      "type": "PATTERN",
+      "value": "\\\\."
+    }
+  }
+}
diff --git a/test/integration/test_grammars.cc b/test/integration/test_grammars.cc
index 3741a3c9..d10523ae 100644
--- a/test/integration/test_grammars.cc
+++ b/test/integration/test_grammars.cc
@@ -9,8 +9,6 @@
 
 START_TEST
 
-if (TREE_SITTER_SEED == -1) return;
-
 string grammars_dir_path = join_path({"test", "fixtures", "test_grammars"});
 vector<string> test_languages = list_directory(grammars_dir_path);
 

From 6ebb9195b11e118e738e5529b4b894004a42d501 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 31 Jul 2018 10:18:49 -0700
Subject: [PATCH 2/3] Flesh out integration test for precedence within tokens

---
 .../precedence_on_token/corpus.txt            | 62 +++++++++++++++----
 .../precedence_on_token/grammar.json          | 49 +++++++++++++--
 .../precedence_on_token/readme.md             |  1 +
 3 files changed, 94 insertions(+), 18 deletions(-)
 create mode 100644 test/fixtures/test_grammars/precedence_on_token/readme.md

diff --git a/test/fixtures/test_grammars/precedence_on_token/corpus.txt b/test/fixtures/test_grammars/precedence_on_token/corpus.txt
index 8c8fd541..a37440bb 100644
--- a/test/fixtures/test_grammars/precedence_on_token/corpus.txt
+++ b/test/fixtures/test_grammars/precedence_on_token/corpus.txt
@@ -1,22 +1,58 @@
 ==========================================
-simple strings
+obvious tokens
 ==========================================
 
+// hi
+/* hi */
+hi
+/
 "hi"
-
----
-
-(program (string))
-
-==========================================
-strings starting with double slashes
-==========================================
-
-// comment
-"//not \t a \t comment"
+/hi/
 
 ---
 
 (program
   (comment)
-  (string (escape_sequence) (escape_sequence)))
+  (comment)
+  (identifier)
+  (slash)
+  (string)
+  (regex))
+
+==========================================
+strings starting with double slashes
+==========================================
+
+/*
+The lexer matches the string content correctly even though
+a comment could match all the way until the end of the line,
+because the string content token has a higher precedence
+than the comment token.
+*/
+
+"//one\n//two"
+
+---
+
+(program
+  (comment)
+  (string (escape_sequence)))
+
+==========================================
+comments that resemble regexes
+==========================================
+
+/*
+The lexer matches this as a comment followed by an identifier
+even though a regex token could match the entire thing, because
+the comment token has a higher precedence than the regex token
+*/
+
+/* hello */ui
+
+---
+
+(program
+  (comment)
+  (comment)
+  (identifier))
diff --git a/test/fixtures/test_grammars/precedence_on_token/grammar.json b/test/fixtures/test_grammars/precedence_on_token/grammar.json
index d9557add..1b1ef7ea 100644
--- a/test/fixtures/test_grammars/precedence_on_token/grammar.json
+++ b/test/fixtures/test_grammars/precedence_on_token/grammar.json
@@ -10,14 +10,38 @@
     "program": {
       "type": "REPEAT",
       "content": {
-        "type": "SYMBOL",
-        "name": "string"
+        "type": "CHOICE",
+        "members": [
+          {
+            "type": "SYMBOL",
+            "name": "string"
+          },
+          {
+            "type": "SYMBOL",
+            "name": "regex"
+          },
+          {
+            "type": "SYMBOL",
+            "name": "identifier"
+          },
+          {
+            "type": "SYMBOL",
+            "name": "slash"
+          }
+        ]
       }
     },
 
     "comment": {
-      "type": "PATTERN",
-      "value": "//.*"
+      "type": "TOKEN",
+      "content": {
+        "type": "PREC",
+        "value": 1,
+        "content": {
+          "type": "PATTERN",
+          "value": "//.*|/\\*[^*]*\\*/"
+        }
+      }
     },
 
     "string": {
@@ -34,7 +58,7 @@
                 "type": "TOKEN",
                 "content": {
                   "type": "PREC",
-                  "value": 1,
+                  "value": 2,
                   "content": {
                     "type": "PATTERN",
                     "value": "[^\"\n\\\\]+"
@@ -56,6 +80,21 @@
     "escape_sequence": {
       "type": "PATTERN",
       "value": "\\\\."
+    },
+
+    "regex": {
+      "type": "PATTERN",
+      "value": "/[^/\n]+/[a-z]*"
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-z]\\w*"
+    },
+
+    "slash": {
+      "type": "STRING",
+      "value": "/"
     }
   }
 }
diff --git a/test/fixtures/test_grammars/precedence_on_token/readme.md b/test/fixtures/test_grammars/precedence_on_token/readme.md
new file mode 100644
index 00000000..354d70e7
--- /dev/null
+++ b/test/fixtures/test_grammars/precedence_on_token/readme.md
@@ -0,0 +1 @@
+This grammar shows the behavior of precedence used within a `TOKEN` rule. Tokens with higher precedence are preferred, even if they match a shorter string.

From 714fda917a00a16e50b17a85074114aace745070 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Tue, 31 Jul 2018 11:50:09 -0700
Subject: [PATCH 3/3] Update test now that JS strings are parsed differently

---
 test/runtime/parser_test.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/runtime/parser_test.cc b/test/runtime/parser_test.cc
index 5bd3fb2d..9a354252 100644
--- a/test/runtime/parser_test.cc
+++ b/test/runtime/parser_test.cc
@@ -172,9 +172,9 @@ describe("Parser", [&]() {
 
     describe("when there is an unterminated error", [&]() {
       it("maintains a consistent tree", [&]() {
-        ts_parser_set_language(parser, load_real_language("javascript"));
-        set_text("a; ' this string never ends");
-        assert_root_node("(program (expression_statement (identifier)) (ERROR (UNEXPECTED EOF)))");
+        ts_parser_set_language(parser, load_real_language("json"));
+        set_text("nul");
+        assert_root_node("(ERROR (UNEXPECTED EOF))");
       });
     });