Merge pull request #188 from tree-sitter/token-precedence
Ensure that precedence is respected properly when used within tokens
This commit is contained in:
commit
f1821bb04d
5 changed files with 162 additions and 5 deletions
58
test/fixtures/test_grammars/precedence_on_token/corpus.txt
vendored
Normal file
58
test/fixtures/test_grammars/precedence_on_token/corpus.txt
vendored
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
==========================================
|
||||
obvious tokens
|
||||
==========================================
|
||||
|
||||
// hi
|
||||
/* hi */
|
||||
hi
|
||||
/
|
||||
"hi"
|
||||
/hi/
|
||||
|
||||
---
|
||||
|
||||
(program
|
||||
(comment)
|
||||
(comment)
|
||||
(identifier)
|
||||
(slash)
|
||||
(string)
|
||||
(regex))
|
||||
|
||||
==========================================
|
||||
strings starting with double slashes
|
||||
==========================================
|
||||
|
||||
/*
|
||||
The lexer matches the string content correctly even though
|
||||
a comment could match all the way until the end of the line,
|
||||
because the string content token has a higher precedence
|
||||
than the comment token.
|
||||
*/
|
||||
|
||||
"//one\n//two"
|
||||
|
||||
---
|
||||
|
||||
(program
|
||||
(comment)
|
||||
(string (escape_sequence)))
|
||||
|
||||
==========================================
|
||||
comments that resemble regexes
|
||||
==========================================
|
||||
|
||||
/*
|
||||
The lexer matches this as a comment followed by an identifier
|
||||
even though a regex token could match the entire thing, because
|
||||
the comment token has a higher precedence than the regex token
|
||||
*/
|
||||
|
||||
/* hello */ui
|
||||
|
||||
---
|
||||
|
||||
(program
|
||||
(comment)
|
||||
(comment)
|
||||
(identifier))
|
||||
100
test/fixtures/test_grammars/precedence_on_token/grammar.json
vendored
Normal file
100
test/fixtures/test_grammars/precedence_on_token/grammar.json
vendored
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
{
|
||||
"name": "precedence_on_token",
|
||||
|
||||
"extras": [
|
||||
{"type": "SYMBOL", "name": "comment"},
|
||||
{"type": "PATTERN", "value": "\\s"},
|
||||
],
|
||||
|
||||
"rules": {
|
||||
"program": {
|
||||
"type": "REPEAT",
|
||||
"content": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "string"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "regex"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "identifier"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "slash"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"comment": {
|
||||
"type": "TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": 1,
|
||||
"content": {
|
||||
"type": "PATTERN",
|
||||
"value": "//.*|/\\*[^*]*\\*/"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
"string": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "STRING", "value": "\""},
|
||||
|
||||
{
|
||||
"type": "REPEAT",
|
||||
"content": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "TOKEN",
|
||||
"content": {
|
||||
"type": "PREC",
|
||||
"value": 2,
|
||||
"content": {
|
||||
"type": "PATTERN",
|
||||
"value": "[^\"\n\\\\]+"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "escape_sequence"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
{"type": "STRING", "value": "\""}
|
||||
]
|
||||
},
|
||||
|
||||
"escape_sequence": {
|
||||
"type": "PATTERN",
|
||||
"value": "\\\\."
|
||||
},
|
||||
|
||||
"regex": {
|
||||
"type": "PATTERN",
|
||||
"value": "/[^/\n]+/[a-z]*"
|
||||
},
|
||||
|
||||
"identifier": {
|
||||
"type": "PATTERN",
|
||||
"value": "[a-z]\\w*"
|
||||
},
|
||||
|
||||
"slash": {
|
||||
"type": "STRING",
|
||||
"value": "/"
|
||||
}
|
||||
}
|
||||
}
|
||||
1
test/fixtures/test_grammars/precedence_on_token/readme.md
vendored
Normal file
1
test/fixtures/test_grammars/precedence_on_token/readme.md
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
This grammar shows the behavior of precedence used within a `TOKEN` rule. Tokens with higher precedence are preferred, even if they match a shorter string.
|
||||
|
|
@ -9,8 +9,6 @@
|
|||
|
||||
START_TEST
|
||||
|
||||
if (TREE_SITTER_SEED == -1) return;
|
||||
|
||||
string grammars_dir_path = join_path({"test", "fixtures", "test_grammars"});
|
||||
vector<string> test_languages = list_directory(grammars_dir_path);
|
||||
|
||||
|
|
|
|||
|
|
@ -172,9 +172,9 @@ describe("Parser", [&]() {
|
|||
|
||||
describe("when there is an unterminated error", [&]() {
|
||||
it("maintains a consistent tree", [&]() {
|
||||
ts_parser_set_language(parser, load_real_language("javascript"));
|
||||
set_text("a; ' this string never ends");
|
||||
assert_root_node("(program (expression_statement (identifier)) (ERROR (UNEXPECTED EOF)))");
|
||||
ts_parser_set_language(parser, load_real_language("json"));
|
||||
set_text("nul");
|
||||
assert_root_node("(ERROR (UNEXPECTED EOF))");
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue