diff --git a/test/fixtures/test_grammars/precedence_on_token/corpus.txt b/test/fixtures/test_grammars/precedence_on_token/corpus.txt new file mode 100644 index 00000000..a37440bb --- /dev/null +++ b/test/fixtures/test_grammars/precedence_on_token/corpus.txt @@ -0,0 +1,58 @@ +========================================== +obvious tokens +========================================== + +// hi +/* hi */ +hi +/ +"hi" +/hi/ + +--- + +(program + (comment) + (comment) + (identifier) + (slash) + (string) + (regex)) + +========================================== +strings starting with double slashes +========================================== + +/* +The lexer matches the string content correctly even though +a comment could match all the way until the end of the line, +because the string content token has a higher precedence +than the comment token. +*/ + +"//one\n//two" + +--- + +(program + (comment) + (string (escape_sequence))) + +========================================== +comments that resemble regexes +========================================== + +/* +The lexer matches this as a comment followed by an identifier +even though a regex token could match the entire thing, because +the comment token has a higher precedence than the regex token +*/ + +/* hello */ui + +--- + +(program + (comment) + (comment) + (identifier)) diff --git a/test/fixtures/test_grammars/precedence_on_token/grammar.json b/test/fixtures/test_grammars/precedence_on_token/grammar.json new file mode 100644 index 00000000..1b1ef7ea --- /dev/null +++ b/test/fixtures/test_grammars/precedence_on_token/grammar.json @@ -0,0 +1,100 @@ +{ + "name": "precedence_on_token", + + "extras": [ + {"type": "SYMBOL", "name": "comment"}, + {"type": "PATTERN", "value": "\\s"}, + ], + + "rules": { + "program": { + "type": "REPEAT", + "content": { + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "string" + }, + { + "type": "SYMBOL", + "name": "regex" + }, + { + "type": "SYMBOL", + "name": "identifier" + }, + { + "type": "SYMBOL", + "name": "slash" + } + ] + } + }, + + "comment": { + "type": "TOKEN", + "content": { + "type": "PREC", + "value": 1, + "content": { + "type": "PATTERN", + "value": "//.*|/\\*[^*]*\\*/" + } + } + }, + + "string": { + "type": "SEQ", + "members": [ + {"type": "STRING", "value": "\""}, + + { + "type": "REPEAT", + "content": { + "type": "CHOICE", + "members": [ + { + "type": "TOKEN", + "content": { + "type": "PREC", + "value": 2, + "content": { + "type": "PATTERN", + "value": "[^\"\n\\\\]+" + } + } + }, + { + "type": "SYMBOL", + "name": "escape_sequence" + } + ] + } + }, + + {"type": "STRING", "value": "\""} + ] + }, + + "escape_sequence": { + "type": "PATTERN", + "value": "\\\\." + }, + + "regex": { + "type": "PATTERN", + "value": "/[^/\n]+/[a-z]*" + }, + + "identifier": { + "type": "PATTERN", + "value": "[a-z]\\w*" + }, + + "slash": { + "type": "STRING", + "value": "/" + } + } +} diff --git a/test/fixtures/test_grammars/precedence_on_token/readme.md b/test/fixtures/test_grammars/precedence_on_token/readme.md new file mode 100644 index 00000000..354d70e7 --- /dev/null +++ b/test/fixtures/test_grammars/precedence_on_token/readme.md @@ -0,0 +1 @@ +This grammar shows the behavior of precedence used within a `TOKEN` rule. Tokens with higher precedence are preferred, even if they match a shorter string. diff --git a/test/integration/test_grammars.cc b/test/integration/test_grammars.cc index 3741a3c9..d10523ae 100644 --- a/test/integration/test_grammars.cc +++ b/test/integration/test_grammars.cc @@ -9,8 +9,6 @@ START_TEST -if (TREE_SITTER_SEED == -1) return; - string grammars_dir_path = join_path({"test", "fixtures", "test_grammars"}); vector test_languages = list_directory(grammars_dir_path); diff --git a/test/runtime/parser_test.cc b/test/runtime/parser_test.cc index 5bd3fb2d..9a354252 100644 --- a/test/runtime/parser_test.cc +++ b/test/runtime/parser_test.cc @@ -172,9 +172,9 @@ describe("Parser", [&]() { describe("when there is an unterminated error", [&]() { it("maintains a consistent tree", [&]() { - ts_parser_set_language(parser, load_real_language("javascript")); - set_text("a; ' this string never ends"); - assert_root_node("(program (expression_statement (identifier)) (ERROR (UNEXPECTED EOF)))"); + ts_parser_set_language(parser, load_real_language("json")); + set_text("nul"); + assert_root_node("(ERROR (UNEXPECTED EOF))"); }); });