Flesh out integration test for precedence within tokens

This commit is contained in:
Max Brunsfeld 2018-07-31 10:18:49 -07:00
parent 23e4596ec1
commit 6ebb9195b1
3 changed files with 94 additions and 18 deletions

View file

@ -1,22 +1,58 @@
==========================================
simple strings
obvious tokens
==========================================
// hi
/* hi */
hi
/
"hi"
---
(program (string))
==========================================
strings starting with double slashes
==========================================
// comment
"//not \t a \t comment"
/hi/
---
(program
(comment)
(string (escape_sequence) (escape_sequence)))
(comment)
(identifier)
(slash)
(string)
(regex))
==========================================
strings starting with double slashes
==========================================
/*
The lexer matches the string content correctly even though
a comment could match all the way until the end of the line,
because the string content token has a higher precedence
than the comment token.
*/
"//one\n//two"
---
(program
(comment)
(string (escape_sequence)))
==========================================
comments that resemble regexes
==========================================
/*
The lexer matches this as a comment followed by an identifier
even though a regex token could match the entire thing, because
the comment token has a higher precedence than the regex token
*/
/* hello */ui
---
(program
(comment)
(comment)
(identifier))

View file

@ -10,14 +10,38 @@
"program": {
"type": "REPEAT",
"content": {
"type": "SYMBOL",
"name": "string"
"type": "CHOICE",
"members": [
{
"type": "SYMBOL",
"name": "string"
},
{
"type": "SYMBOL",
"name": "regex"
},
{
"type": "SYMBOL",
"name": "identifier"
},
{
"type": "SYMBOL",
"name": "slash"
}
]
}
},
"comment": {
"type": "PATTERN",
"value": "//.*"
"type": "TOKEN",
"content": {
"type": "PREC",
"value": 1,
"content": {
"type": "PATTERN",
"value": "//.*|/\\*[^*]*\\*/"
}
}
},
"string": {
@ -34,7 +58,7 @@
"type": "TOKEN",
"content": {
"type": "PREC",
"value": 1,
"value": 2,
"content": {
"type": "PATTERN",
"value": "[^\"\n\\\\]+"
@ -56,6 +80,21 @@
"escape_sequence": {
"type": "PATTERN",
"value": "\\\\."
},
"regex": {
"type": "PATTERN",
"value": "/[^/\n]+/[a-z]*"
},
"identifier": {
"type": "PATTERN",
"value": "[a-z]\\w*"
},
"slash": {
"type": "STRING",
"value": "/"
}
}
}

View file

@ -0,0 +1 @@
This grammar shows the behavior of precedence used within a `TOKEN` rule. Tokens with higher precedence are preferred, even if they match a shorter string.