Flesh out integration test for precedence within tokens

2018-07-31 10:18:49 -07:00 · 2018-07-31 10:18:49 -07:00 · 6ebb9195b1
commit 6ebb9195b1
parent 23e4596ec1
3 changed files with 94 additions and 18 deletions
--- a/test/fixtures/test_grammars/precedence_on_token/corpus.txt
+++ b/test/fixtures/test_grammars/precedence_on_token/corpus.txt
@ -1,22 +1,58 @@
 ==========================================
-simple strings
+obvious tokens
 ==========================================

+// hi
+/* hi */
+hi
+/
 "hi"
-
---
-
-(program (string))
-
-==========================================
-strings starting with double slashes
-==========================================
-
-// comment
-"//not \t a \t comment"
+/hi/

 ---

 (program
  (comment)
-  (string (escape_sequence) (escape_sequence)))
+  (comment)
+  (identifier)
+  (slash)
+  (string)
+  (regex))
+
+==========================================
+strings starting with double slashes
+==========================================
+
+/*
+The lexer matches the string content correctly even though
+a comment could match all the way until the end of the line,
+because the string content token has a higher precedence
+than the comment token.
+*/
+
+"//one\n//two"
+
+---
+
+(program
+  (comment)
+  (string (escape_sequence)))
+
+==========================================
+comments that resemble regexes
+==========================================
+
+/*
+The lexer matches this as a comment followed by an identifier
+even though a regex token could match the entire thing, because
+the comment token has a higher precedence than the regex token
+*/
+
+/* hello */ui
+
+---
+
+(program
+  (comment)
+  (comment)
+  (identifier))
--- a/test/fixtures/test_grammars/precedence_on_token/grammar.json
+++ b/test/fixtures/test_grammars/precedence_on_token/grammar.json
@ -10,14 +10,38 @@
    "program": {
      "type": "REPEAT",
      "content": {
-        "type": "SYMBOL",
-        "name": "string"
+        "type": "CHOICE",
+        "members": [
+          {
+            "type": "SYMBOL",
+            "name": "string"
+          },
+          {
+            "type": "SYMBOL",
+            "name": "regex"
+          },
+          {
+            "type": "SYMBOL",
+            "name": "identifier"
+          },
+          {
+            "type": "SYMBOL",
+            "name": "slash"
+          }
+        ]
      }
    },

    "comment": {
-      "type": "PATTERN",
-      "value": "//.*"
+      "type": "TOKEN",
+      "content": {
+        "type": "PREC",
+        "value": 1,
+        "content": {
+          "type": "PATTERN",
+          "value": "//.*|/\\*[^*]*\\*/"
+        }
+      }
    },

    "string": {
@ -34,7 +58,7 @@
                "type": "TOKEN",
                "content": {
                  "type": "PREC",
-                  "value": 1,
+                  "value": 2,
                  "content": {
                    "type": "PATTERN",
                    "value": "[^\"\n\\\\]+"
@ -56,6 +80,21 @@
    "escape_sequence": {
      "type": "PATTERN",
      "value": "\\\\."
+    },
+
+    "regex": {
+      "type": "PATTERN",
+      "value": "/[^/\n]+/[a-z]*"
+    },
+
+    "identifier": {
+      "type": "PATTERN",
+      "value": "[a-z]\\w*"
+    },
+
+    "slash": {
+      "type": "STRING",
+      "value": "/"
    }
  }
 }
--- a/test/fixtures/test_grammars/precedence_on_token/readme.md
+++ b/test/fixtures/test_grammars/precedence_on_token/readme.md
@ -0,0 +1 @@
+This grammar shows the behavior of precedence used within a `TOKEN` rule. Tokens with higher precedence are preferred, even if they match a shorter string.
				`@ -0,0 +1 @@`
				This grammar shows the behavior of precedence used within a `TOKEN` rule. Tokens with higher precedence are preferred, even if they match a shorter string.