diff --git a/test/fixtures/test_grammars/precedence_on_token/corpus.txt b/test/fixtures/test_grammars/precedence_on_token/corpus.txt index 8c8fd541..a37440bb 100644 --- a/test/fixtures/test_grammars/precedence_on_token/corpus.txt +++ b/test/fixtures/test_grammars/precedence_on_token/corpus.txt @@ -1,22 +1,58 @@ ========================================== -simple strings +obvious tokens ========================================== +// hi +/* hi */ +hi +/ "hi" - ---- - -(program (string)) - -========================================== -strings starting with double slashes -========================================== - -// comment -"//not \t a \t comment" +/hi/ --- (program (comment) - (string (escape_sequence) (escape_sequence))) + (comment) + (identifier) + (slash) + (string) + (regex)) + +========================================== +strings starting with double slashes +========================================== + +/* +The lexer matches the string content correctly even though +a comment could match all the way until the end of the line, +because the string content token has a higher precedence +than the comment token. +*/ + +"//one\n//two" + +--- + +(program + (comment) + (string (escape_sequence))) + +========================================== +comments that resemble regexes +========================================== + +/* +The lexer matches this as a comment followed by an identifier +even though a regex token could match the entire thing, because +the comment token has a higher precedence than the regex token +*/ + +/* hello */ui + +--- + +(program + (comment) + (comment) + (identifier)) diff --git a/test/fixtures/test_grammars/precedence_on_token/grammar.json b/test/fixtures/test_grammars/precedence_on_token/grammar.json index d9557add..1b1ef7ea 100644 --- a/test/fixtures/test_grammars/precedence_on_token/grammar.json +++ b/test/fixtures/test_grammars/precedence_on_token/grammar.json @@ -10,14 +10,38 @@ "program": { "type": "REPEAT", "content": { - "type": "SYMBOL", - "name": "string" + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "string" + }, + { + "type": "SYMBOL", + "name": "regex" + }, + { + "type": "SYMBOL", + "name": "identifier" + }, + { + "type": "SYMBOL", + "name": "slash" + } + ] } }, "comment": { - "type": "PATTERN", - "value": "//.*" + "type": "TOKEN", + "content": { + "type": "PREC", + "value": 1, + "content": { + "type": "PATTERN", + "value": "//.*|/\\*[^*]*\\*/" + } + } }, "string": { @@ -34,7 +58,7 @@ "type": "TOKEN", "content": { "type": "PREC", - "value": 1, + "value": 2, "content": { "type": "PATTERN", "value": "[^\"\n\\\\]+" @@ -56,6 +80,21 @@ "escape_sequence": { "type": "PATTERN", "value": "\\\\." + }, + + "regex": { + "type": "PATTERN", + "value": "/[^/\n]+/[a-z]*" + }, + + "identifier": { + "type": "PATTERN", + "value": "[a-z]\\w*" + }, + + "slash": { + "type": "STRING", + "value": "/" } } } diff --git a/test/fixtures/test_grammars/precedence_on_token/readme.md b/test/fixtures/test_grammars/precedence_on_token/readme.md new file mode 100644 index 00000000..354d70e7 --- /dev/null +++ b/test/fixtures/test_grammars/precedence_on_token/readme.md @@ -0,0 +1 @@ +This grammar shows the behavior of precedence used within a `TOKEN` rule. Tokens with higher precedence are preferred, even if they match a shorter string.