Restructure integration tests to use separate JSON and corpus files

This makes these tests way easier to write and read.
This commit is contained in:
Max Brunsfeld 2017-03-09 11:49:30 -08:00
parent f049d5d94c
commit ac4167fdc9
54 changed files with 1424 additions and 1211 deletions

View file

@ -1,42 +0,0 @@
#include <tree_sitter/parser.h>
enum {
COMMENT,
};
void *tree_sitter_extra_external_tokens_external_scanner_create() {
return NULL;
}
void tree_sitter_extra_external_tokens_external_scanner_reset(void *payload) {
}
bool tree_sitter_extra_external_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) {
return true;
}
void tree_sitter_extra_external_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {
}
bool tree_sitter_extra_external_tokens_external_scanner_scan(
void *payload, TSLexer *lexer, const bool *whitelist) {
while (lexer->lookahead == ' ') {
lexer->advance(lexer, true);
}
if (lexer->lookahead == '#') {
lexer->advance(lexer, false);
while (lexer->lookahead != '\n') {
lexer->advance(lexer, false);
}
lexer->result_symbol = COMMENT;
return true;
}
return false;
}
void tree_sitter_extra_external_tokens_external_scanner_destroy(void *payload) {
}

View file

@ -0,0 +1,32 @@
================================================
anonymous tokens defined with character classes
================================================
1234
---
(first_rule)
=================================================
anonymous tokens defined with LF escape sequence
=================================================
---
(first_rule)
=================================================
anonymous tokens defined with CR escape sequence
=================================================
---
(first_rule)
================================================
anonymous tokens with quotes
================================================
'hello'
---
(first_rule)

View file

@ -0,0 +1,14 @@
{
"name": "anonymous_tokens_with_escaped_chars",
"rules": {
"first_rule": {
"type": "CHOICE",
"members": [
{"type": "STRING", "value": "\n"},
{"type": "STRING", "value": "\r"},
{"type": "STRING", "value": "'hello'"},
{"type": "PATTERN", "value": "\\d+"}
]
}
}
}

View file

@ -0,0 +1,8 @@
===================
chained operations
===================
x+y+z
---
(expression (math_operation
(expression (math_operation (expression (identifier)) (expression (identifier))))
(expression (identifier))))

View file

@ -0,0 +1,31 @@
{
"name": "associativity_left",
"rules": {
"expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "math_operation"},
{"type": "SYMBOL", "name": "identifier"}
]
},
"math_operation": {
"type": "PREC_LEFT",
"value": 0,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "+"},
{"type": "SYMBOL", "name": "expression"}
]
}
},
"identifier": {
"type": "PATTERN",
"value": "[a-zA-Z]+"
}
}
}

View file

@ -0,0 +1,13 @@
Unresolved conflict for symbol sequence:
expression '+' expression • '+' …
Possible interpretations:
1: (math_operation expression '+' expression) • '+' …
2: expression '+' (math_operation expression • '+' expression)
Possible resolutions:
1: Specify a left or right associativity in `math_operation`
2: Add a conflict for these rules: `math_operation`

View file

@ -0,0 +1,27 @@
{
"name": "associativity_missing",
"rules": {
"expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "math_operation"},
{"type": "SYMBOL", "name": "identifier"}
]
},
"math_operation": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "+"},
{"type": "SYMBOL", "name": "expression"}
]
},
"identifier": {
"type": "PATTERN",
"value": "[a-zA-Z]+"
}
}
}

View file

@ -0,0 +1,8 @@
===================
chained operations
===================
x+y+z
---
(expression (math_operation
(expression (identifier))
(expression (math_operation (expression (identifier)) (expression (identifier))))))

View file

@ -0,0 +1,31 @@
{
"name": "associativity_right",
"rules": {
"expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "math_operation"},
{"type": "SYMBOL", "name": "identifier"}
]
},
"math_operation": {
"type": "PREC_RIGHT",
"value": 0,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "+"},
{"type": "SYMBOL", "name": "expression"}
]
}
},
"identifier": {
"type": "PATTERN",
"value": "[a-zA-Z]+"
}
}
}

View file

@ -0,0 +1,15 @@
Unresolved conflict for symbol sequence:
expression '+' expression • '*' …
Possible interpretations:
1: (sum expression '+' expression) • '*' …
2: expression '+' (product expression • '*' expression)
3: expression '+' (other_thing expression • '*' '*')
Possible resolutions:
1: Specify a higher precedence in `product` and `other_thing` than in the other rules.
2: Specify a higher precedence in `sum` than in the other rules.
3: Add a conflict for these rules: `sum` `product` `other_thing`

View file

@ -0,0 +1,58 @@
{
"name": "conflicting_precedence",
"rules": {
"expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "sum"},
{"type": "SYMBOL", "name": "product"},
{"type": "SYMBOL", "name": "other_thing"}
]
},
"sum": {
"type": "PREC_LEFT",
"value": 0,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "+"},
{"type": "SYMBOL", "name": "expression"}
]
}
},
"product": {
"type": "PREC_LEFT",
"value": 1,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "*"},
{"type": "SYMBOL", "name": "expression"}
]
}
},
"other_thing": {
"type": "PREC_LEFT",
"value": -1,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "*"},
{"type": "STRING", "value": "*"}
]
}
},
"identifier": {
"type": "PATTERN",
"value": "[a-zA-Z]+"
}
}
}

View file

@ -0,0 +1,2 @@
The rule `rule_2` matches the empty string.
Tree-sitter currently does not support syntactic rules that match the empty string.

View file

@ -0,0 +1,15 @@
{
"name": "epsilon_rules",
"rules": {
"rule_1": {"type": "SYMBOL", "name": "rule_2"},
"rule_2": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "rule_1"},
{"type": "BLANK"}
]
}
}
}

View file

@ -0,0 +1,41 @@
=========================================
single-line statements - internal tokens
=========================================
a b
---
(statement (variable) (variable) (line_break))
=========================================
multi-line statements - internal tokens
=========================================
a
b
---
(statement (variable) (variable) (line_break))
=========================================
single-line statements - external tokens
=========================================
'hello' 'world'
---
(statement (string) (string) (line_break))
=========================================
multi-line statements - external tokens
=========================================
'hello'
'world'
---
(statement (string) (string) (line_break))

View file

@ -0,0 +1,36 @@
{
"name": "external_and_internal_tokens",
"externals": [
"string",
"line_break"
],
"extras": [
{"type": "PATTERN", "value": "\\s"}
],
"rules": {
"statement": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "_expression"},
{"type": "SYMBOL", "name": "_expression"},
{"type": "SYMBOL", "name": "line_break"}
]
},
"_expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "string"},
{"type": "SYMBOL", "name": "variable"},
{"type": "SYMBOL", "name": "number"}
]
},
"variable": {"type": "PATTERN", "value": "\\a+"},
"number": {"type": "PATTERN", "value": "\\d+"},
"line_break": {"type": "STRING", "value": "\n"}
}
}

View file

@ -0,0 +1 @@
This grammar has an external scanner whose `scan` method needs to be able to check for the validity of an *internal* token. This is done by including the names of that internal token (`_line_break`) in the grammar's `externals` field.

View file

@ -1,4 +1,3 @@
#include <stdbool.h>
#include <tree_sitter/parser.h>
enum {
@ -6,21 +5,17 @@ enum {
LINE_BREAK
};
void *tree_sitter_shared_external_tokens_external_scanner_create() {
return NULL;
}
void *tree_sitter_external_and_internal_tokens_external_scanner_create() { return NULL; }
void tree_sitter_shared_external_tokens_external_scanner_reset(void *payload) {
}
void tree_sitter_external_and_internal_tokens_external_scanner_destroy(void *payload) {}
bool tree_sitter_shared_external_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) {
return true;
}
void tree_sitter_external_and_internal_tokens_external_scanner_reset(void *payload) {}
void tree_sitter_shared_external_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {
}
bool tree_sitter_external_and_internal_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) { return true; }
bool tree_sitter_shared_external_tokens_external_scanner_scan(
void tree_sitter_external_and_internal_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {}
bool tree_sitter_external_and_internal_tokens_external_scanner_scan(
void *payload, TSLexer *lexer, const bool *whitelist) {
// If a line-break is a valid lookahead token, only skip spaces.
@ -58,6 +53,3 @@ bool tree_sitter_shared_external_tokens_external_scanner_scan(
return false;
}
void tree_sitter_shared_external_tokens_external_scanner_destroy(void *payload) {
}

View file

@ -0,0 +1,10 @@
========================
extra external tokens
========================
x = # a comment
y
---
(assignment (variable) (comment) (variable))

View file

@ -0,0 +1,25 @@
{
"name": "external_extra_tokens",
"externals": [
"comment"
],
"extras": [
{"type": "PATTERN", "value": "\\s"},
{"type": "SYMBOL", "name": "comment"}
],
"rules": {
"assignment": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "variable"},
{"type": "STRING", "value": "="},
{"type": "SYMBOL", "name": "variable"}
]
},
"variable": {"type": "PATTERN", "value": "\\a+"}
}
}

View file

@ -0,0 +1,36 @@
#include <tree_sitter/parser.h>
enum {
COMMENT,
};
void *tree_sitter_external_extra_tokens_external_scanner_create() { return NULL; }
void tree_sitter_external_extra_tokens_external_scanner_destroy(void *payload) {}
void tree_sitter_external_extra_tokens_external_scanner_reset(void *payload) {}
bool tree_sitter_external_extra_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) { return true; }
void tree_sitter_external_extra_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {}
bool tree_sitter_external_extra_tokens_external_scanner_scan(
void *payload, TSLexer *lexer, const bool *whitelist) {
while (lexer->lookahead == ' ') {
lexer->advance(lexer, true);
}
if (lexer->lookahead == '#') {
lexer->advance(lexer, false);
while (lexer->lookahead != '\n') {
lexer->advance(lexer, false);
}
lexer->result_symbol = COMMENT;
return true;
}
return false;
}

View file

@ -0,0 +1,22 @@
========================
simple external tokens
=========================
x + %(sup (external) scanner?)
---
(expression (sum (expression (identifier)) (expression (string))))
==================================
external tokens that require state
==================================
%{sup {} #{x + y} {} scanner?}
---
(expression (string
(expression (sum
(expression (identifier))
(expression (identifier))))))

View file

@ -0,0 +1,57 @@
{
"name": "external_tokens",
"externals": [
"_percent_string",
"_percent_string_start",
"_percent_string_end"
],
"extras": [
{"type": "PATTERN", "value": "\\s"}
],
"rules": {
"expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "string"},
{"type": "SYMBOL", "name": "sum"},
{"type": "SYMBOL", "name": "identifier"}
]
},
"sum": {
"type": "PREC_LEFT",
"value": 0,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "+"},
{"type": "SYMBOL", "name": "expression"}
]
}
},
"string": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "_percent_string"},
{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "_percent_string_start"},
{"type": "SYMBOL", "name": "expression"},
{"type": "SYMBOL", "name": "_percent_string_end"}
]
},
]
},
"identifier": {
"type": "PATTERN",
"value": "\\a+"
}
}
}

View file

@ -1,4 +1,3 @@
#include <stdbool.h>
#include <tree_sitter/parser.h>
enum {
@ -13,7 +12,7 @@ typedef struct {
uint32_t depth;
} Scanner;
void *tree_sitter_external_scanner_example_external_scanner_create() {
void *tree_sitter_external_tokens_external_scanner_create() {
Scanner *scanner = malloc(sizeof(Scanner));
*scanner = (Scanner){
.open_delimiter = 0,
@ -23,7 +22,17 @@ void *tree_sitter_external_scanner_example_external_scanner_create() {
return scanner;
}
bool tree_sitter_external_scanner_example_external_scanner_scan(
void tree_sitter_external_tokens_external_scanner_destroy(void *payload) {
free(payload);
}
void tree_sitter_external_tokens_external_scanner_reset(void *payload) {}
bool tree_sitter_external_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) { return true; }
void tree_sitter_external_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {}
bool tree_sitter_external_tokens_external_scanner_scan(
void *payload, TSLexer *lexer, const bool *whitelist) {
Scanner *scanner = payload;
@ -103,16 +112,3 @@ bool tree_sitter_external_scanner_example_external_scanner_scan(
return false;
}
void tree_sitter_external_scanner_example_external_scanner_reset(void *payload) {
}
bool tree_sitter_external_scanner_example_external_scanner_serialize(void *payload, TSExternalTokenState state) {
return true;
}
void tree_sitter_external_scanner_example_external_scanner_deserialize(void *payload, TSExternalTokenState state) {
}
void tree_sitter_external_scanner_example_external_scanner_destroy(void *payload) {
free(payload);
}

View file

@ -0,0 +1,15 @@
Unresolved conflict for symbol sequence:
identifier • '{' …
Possible interpretations:
1: (expression identifier) • '{' …
2: (function_call identifier • block)
Possible resolutions:
1: Specify a higher precedence in `function_call` than in the other rules.
2: Specify a higher precedence in `expression` than in the other rules.
3: Specify a left or right associativity in `expression`
4: Add a conflict for these rules: `expression` `function_call`

View file

@ -0,0 +1,63 @@
{
"name": "precedence_on_single_child_missing",
"extras": [
{"type": "PATTERN", "value": "\\s"}
],
"rules": {
"expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "function_call"},
{"type": "SYMBOL", "name": "identifier"}
]
},
"function_call": {
"type": "PREC_RIGHT",
"value": 0,
"content": {
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{"type": "SYMBOL", "name": "expression"}
]
},
{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{"type": "SYMBOL", "name": "block"}
]
},
{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{"type": "SYMBOL", "name": "expression"},
{"type": "SYMBOL", "name": "block"}
]
}
]
}
},
"block": {
"type": "SEQ",
"members": [
{"type": "STRING", "value": "{"},
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "}"}
]
},
"identifier": {
"type": "PATTERN",
"value": "[a-zA-Z]+"
}
}
}

View file

@ -0,0 +1,14 @@
This language has function calls similar to Ruby's, with no parentheses required, and optional blocks.
There is a shift/reduce conflict here:
```
foo bar { baz }
^
```
The possible actions are:
1. `reduce(expression, 1)` - `bar` is an expression being passed to the `foo` function.
2. `shift` - `bar` is a function being called with the block `{ baz }`
The grammars `precedence_on_single_child_negative` and `precedence_on_single_child_positive` show possible resolutions to this conflict.

View file

@ -0,0 +1,12 @@
===========================
function calls with blocks
===========================
foo bar { baz }
---
(expression (function_call
(identifier)
(expression (identifier))
(block (expression (identifier)))))

View file

@ -0,0 +1,63 @@
{
"name": "precedence_on_single_child_negative",
"extras": [
{"type": "PATTERN", "value": "\\s"}
],
"rules": {
"expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "function_call"},
{"type": "SYMBOL", "name": "identifier"}
]
},
"function_call": {
"type": "PREC_RIGHT",
"value": -1,
"content": {
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{"type": "SYMBOL", "name": "expression"}
]
},
{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{"type": "SYMBOL", "name": "block"}
]
},
{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{"type": "SYMBOL", "name": "expression"},
{"type": "SYMBOL", "name": "block"}
]
}
]
}
},
"block": {
"type": "SEQ",
"members": [
{"type": "STRING", "value": "{"},
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "}"}
]
},
"identifier": {
"type": "PATTERN",
"value": "[a-zA-Z]+"
}
}
}

View file

@ -0,0 +1 @@
This grammar resolves the conflict shown in the `precedence_on_single_child_missing` grammar by giving `function_call` a negative precedence. This causes reducing the `bar` variable to an expression to be preferred over shifting the `{` token as part of `function_call`.

View file

@ -0,0 +1,13 @@
===========================
function calls with blocks
===========================
foo bar { baz }
---
(expression (function_call
(identifier)
(expression (function_call
(identifier)
(block (expression (identifier)))))))

View file

@ -0,0 +1,63 @@
{
"name": "precedence_on_single_child_positive",
"extras": [
{"type": "PATTERN", "value": "\\s"}
],
"rules": {
"expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "function_call"},
{"type": "SYMBOL", "name": "identifier"}
]
},
"function_call": {
"type": "PREC_RIGHT",
"value": 1,
"content": {
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{"type": "SYMBOL", "name": "expression"}
]
},
{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{"type": "SYMBOL", "name": "block"}
]
},
{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{"type": "SYMBOL", "name": "expression"},
{"type": "SYMBOL", "name": "block"}
]
}
]
}
},
"block": {
"type": "SEQ",
"members": [
{"type": "STRING", "value": "{"},
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "}"}
]
},
"identifier": {
"type": "PATTERN",
"value": "[a-zA-Z]+"
}
}
}

View file

@ -0,0 +1 @@
This grammar resolves the conflict shown in the `precedence_on_single_child_missing` grammar by giving `function_call` a positive precedence. This causes shifting the `{` token as part of `function_call` to be preferred over reducing the `bar` variable to an expression.

View file

@ -0,0 +1,24 @@
==========================================
curly brace blocks with high precedence
==========================================
a b {}
---
(expression (function_call
(identifier)
(expression (function_call (identifier) (block)))))
==========================================
do blocks with low precedence
==========================================
a b do end
---
(expression (function_call
(identifier)
(expression (identifier))
(do_block)))

View file

@ -0,0 +1,135 @@
{
"name": "precedence_on_subsequence",
"extras": [
{"type": "PATTERN", "value": "\\s"}
],
"rules": {
"expression": {
"type": "PREC_LEFT",
"value": 0,
"content": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "function_call"},
{"type": "SYMBOL", "name": "identifier"},
{"type": "SYMBOL", "name": "scope_resolution"}
]
}
},
"function_call": {
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{"type": "SYMBOL", "name": "expression"}
]
},
{
"type": "PREC",
"value": 1,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{"type": "SYMBOL", "name": "block"}
]
}
},
{
"type": "PREC",
"value": -1,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{"type": "SYMBOL", "name": "do_block"}
]
}
},
{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{
"type": "PREC",
"value": 1,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "SYMBOL", "name": "block"}
]
}
}
]
},
{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{
"type": "PREC",
"value": -1,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "SYMBOL", "name": "do_block"}
]
}
}
]
}
]
},
"scope_resolution": {
"type": "PREC_LEFT",
"value": 1,
"content": {
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "::"},
{"type": "SYMBOL", "name": "expression"}
]
},
{
"type": "SEQ",
"members": [
{"type": "STRING", "value": "::"},
{"type": "SYMBOL", "name": "expression"},
]
}
]
}
},
"block": {
"type": "STRING",
"value": "{}"
},
"do_block": {
"type": "STRING",
"value": "do end"
},
"identifier": {
"type": "PATTERN",
"value": "[a-zA-Z]+"
}
}
}

3
spec/fixtures/test_grammars/readme.md vendored Normal file
View file

@ -0,0 +1,3 @@
These small grammars demonstrate specific features or test for certain specific regressions.
For some of them, compilation is expected to fail with a given error message. For others, the resulting parser is expected to produce certain trees.

View file

@ -0,0 +1,13 @@
==================================
the readme example
==================================
a + b * c
---
(expression (sum
(expression (variable))
(expression (product
(expression (variable))
(expression (variable))))))

View file

@ -0,0 +1,67 @@
{
"name": "readme_grammar",
// Things that can appear anywhere in the language, like comments
// and whitespace, are expressed as 'extras'.
"extras": [
{"type": "PATTERN", "value": "\\s"},
{"type": "SYMBOL", "name": "comment"}
],
"rules": {
// The first rule listed in the grammar becomes the 'start rule'.
"expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "sum"},
{"type": "SYMBOL", "name": "product"},
{"type": "SYMBOL", "name": "number"},
{"type": "SYMBOL", "name": "variable"},
{
"type": "SEQ",
"members": [
{"type": "STRING", "value": "("},
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": ")"}
]
}
]
},
// Tokens like '+' and '*' are described directly within the
// grammar's rules, as opposed to in a seperate lexer description.
"sum": {
"type": "PREC_LEFT",
"value": 1,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "+"},
{"type": "SYMBOL", "name": "expression"}
]
}
},
// Ambiguities can be resolved at compile time by assigning precedence
// values to rule subtrees.
"product": {
"type": "PREC_LEFT",
"value": 2,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "*"},
{"type": "SYMBOL", "name": "expression"}
]
}
},
// Tokens can be specified using ECMAScript regexps.
"number": {"type": "PATTERN", "value": "\\d+"},
"comment": {"type": "PATTERN", "value": "#.*"},
"variable": {"type": "PATTERN", "value": "[a-zA-Z]\\w*"}
}
}

View file

@ -0,0 +1,7 @@
========================
the empty string
=======================
---
(first_rule)

View file

@ -0,0 +1,6 @@
{
"name": "start_rule_is_blank",
"rules": {
"first_rule": {"type": "BLANK"}
}
}

View file

@ -0,0 +1,6 @@
===========================
the single token
==========================
the-value
---
(first_rule)

View file

@ -0,0 +1,6 @@
{
"name": "start_rule_is_token",
"rules": {
"first_rule": {"type": "STRING", "value": "the-value"}
}
}

View file

@ -0,0 +1,61 @@
#include "helpers/file_helpers.h"
#include <sys/stat.h>
#include <errno.h>
#include <fstream>
#include <dirent.h>
using std::string;
using std::ifstream;
using std::istreambuf_iterator;
using std::ofstream;
using std::vector;
bool file_exists(const string &path) {
struct stat file_stat;
return stat(path.c_str(), &file_stat) == 0;
}
int get_modified_time(const string &path) {
struct stat file_stat;
if (stat(path.c_str(), &file_stat) != 0) {
if (errno != ENOENT)
fprintf(stderr, "Error in stat() for path: %s\n", + path.c_str());
return 0;
}
return file_stat.st_mtime;
}
string read_file(const string &path) {
ifstream file(path);
istreambuf_iterator<char> file_iterator(file), end_iterator;
string content(file_iterator, end_iterator);
file.close();
return content;
}
void write_file(const string &path, const string &content) {
ofstream file(path);
file << content;
file.close();
}
vector<string> list_directory(const string &path) {
vector<string> result;
DIR *dir = opendir(path.c_str());
if (!dir) {
printf("\nTest error - no such directory '%s'", path.c_str());
return result;
}
struct dirent *dir_entry;
while ((dir_entry = readdir(dir))) {
string name(dir_entry->d_name);
if (name != "." && name != "..") {
result.push_back(name);
}
}
closedir(dir);
return result;
}

View file

@ -0,0 +1,14 @@
#ifndef HELPERS_FILE_HELPERS_H_
#define HELPERS_FILE_HELPERS_H_
#include <string>
#include <vector>
#include <sys/stat.h>
bool file_exists(const std::string &path);
int get_modified_time(const std::string &path);
std::string read_file(const std::string &path);
void write_file(const std::string &path, const std::string &content);
std::vector<std::string> list_directory(const std::string &path);
#endif // HELPERS_FILE_HELPERS_H_

View file

@ -1,12 +1,12 @@
#include "spec_helper.h"
#include "helpers/load_language.h"
#include "helpers/file_helpers.h"
#include <unistd.h>
#include <dlfcn.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <map>
#include <string>
#include <sys/stat.h>
#include <fstream>
#include <stdlib.h>
#include "tree_sitter/compiler.h"
@ -54,25 +54,10 @@ static std::string run_command(const char *cmd, const char *args[]) {
}
}
static bool file_exists(const string &path) {
struct stat file_stat;
return stat(path.c_str(), &file_stat) == 0;
}
static int get_modified_time(const string &path) {
struct stat file_stat;
if (stat(path.c_str(), &file_stat) != 0) {
if (errno != ENOENT)
fprintf(stderr, "Error in stat() for path: %s\n", + path.c_str());
return 0;
}
return file_stat.st_mtime;
}
const TSLanguage *load_language(const string &source_filename,
const string &lib_filename,
const string &language_name,
string external_scanner_filename = "") {
static const TSLanguage *load_language(const string &source_filename,
const string &lib_filename,
const string &language_name,
string external_scanner_filename = "") {
string language_function_name = "tree_sitter_" + language_name;
string header_dir = getenv("PWD") + string("/include");
int source_mtime = get_modified_time(source_filename);
@ -132,9 +117,9 @@ const TSLanguage *load_language(const string &source_filename,
return reinterpret_cast<TSLanguage *(*)()>(language_function)();
}
const TSLanguage *load_compile_result(const string &name,
const TSCompileResult &compile_result,
string external_scanner_path) {
const TSLanguage *load_test_language(const string &name,
const TSCompileResult &compile_result,
string external_scanner_path) {
if (compile_result.error_type != TSCompileErrorTypeNone) {
Assert::Failure(string("Compilation failed ") + compile_result.error_message);
return nullptr;
@ -155,7 +140,7 @@ const TSLanguage *load_compile_result(const string &name,
return language;
}
const TSLanguage *get_test_language(const string &language_name) {
const TSLanguage *load_real_language(const string &language_name) {
if (loaded_languages[language_name])
return loaded_languages[language_name];
@ -182,20 +167,14 @@ const TSLanguage *get_test_language(const string &language_name) {
if (parser_mtime < grammar_mtime || parser_mtime < libcompiler_mtime) {
printf("\n" "Regenerating the %s parser...\n", language_name.c_str());
ifstream grammar_file(grammar_filename);
istreambuf_iterator<char> grammar_file_iterator(grammar_file), end_iterator;
string grammar_json(grammar_file_iterator, end_iterator);
grammar_file.close();
string grammar_json = read_file(grammar_filename);
TSCompileResult result = ts_compile_grammar(grammar_json.c_str());
if (result.error_type != TSCompileErrorTypeNone) {
fprintf(stderr, "Failed to compile %s grammar: %s\n", language_name.c_str(), result.error_message);
return nullptr;
}
ofstream parser_file(parser_filename);
parser_file << result.code;
parser_file.close();
write_file(parser_filename, result.code);
}
mkdir("out/tmp", 0777);

View file

@ -5,8 +5,10 @@
#include "tree_sitter/runtime.h"
#include <string>
const TSLanguage *load_compile_result(const std::string &, const TSCompileResult &,
std::string external_scanner_path = "");
const TSLanguage *get_test_language(const std::string &language_name);
const TSLanguage *load_real_language(const std::string &name);
const TSLanguage *load_test_language(const std::string &name,
const TSCompileResult &compile_result,
std::string external_scanner_path = "");
#endif // HELPERS_LOAD_LANGUAGE_H_

View file

@ -1,20 +1,18 @@
#include "helpers/read_test_entries.h"
#include <assert.h>
#include <string>
#include <fstream>
#include <streambuf>
#include <dirent.h>
#include <regex>
#include "helpers/file_helpers.h"
using std::regex;
using std::regex_search;
using std::regex_replace;
using std::smatch;
using std::regex_constants::extended;
using std::smatch;
using std::string;
using std::vector;
using std::ifstream;
using std::istreambuf_iterator;
string fixtures_dir = "spec/fixtures/";
static string trim_output(const string &input) {
string result(input);
@ -27,7 +25,7 @@ static string trim_output(const string &input) {
static vector<TestEntry> parse_test_entries(string content) {
regex header_pattern("===+\n" "([^=]+)\n" "===+\n", extended);
regex separator_pattern("---+\n", extended);
regex separator_pattern("---+\r?\n", extended);
vector<string> descriptions;
vector<string> bodies;
@ -55,51 +53,42 @@ static vector<TestEntry> parse_test_entries(string content) {
body.substr(0, matches.position() - 1),
trim_output(body.substr(matches.position() + matches[0].length()))
});
} else {
puts(("Invalid corpus entry with description: " + descriptions[i]).c_str());
abort();
}
}
return result;
}
static vector<string> list_directory(string dir_name) {
vector<string> result;
DIR *dir = opendir(dir_name.c_str());
if (!dir) {
printf("\nTest error - no such directory '%s'", dir_name.c_str());
return result;
}
struct dirent *dir_entry;
while ((dir_entry = readdir(dir))) {
string name(dir_entry->d_name);
if (name != "." && name != "..")
result.push_back(dir_name + "/" + name);
}
closedir(dir);
return result;
}
static string read_file(string filename) {
ifstream file(filename);
string result((istreambuf_iterator<char>(file)), istreambuf_iterator<char>());
return result;
}
vector<TestEntry> read_corpus_entries(string language_name) {
vector<TestEntry> read_real_language_corpus(string language_name) {
vector<TestEntry> result;
string fixtures_dir = "spec/fixtures/";
string test_directory = fixtures_dir + "grammars/" + language_name + "/grammar_test";
for (string &test_filename : list_directory(test_directory))
for (TestEntry &entry : parse_test_entries(read_file(test_filename)))
for (string &test_filename : list_directory(test_directory)) {
for (TestEntry &entry : parse_test_entries(read_file(test_directory + "/" + test_filename))) {
result.push_back(entry);
}
}
string error_test_filename = fixtures_dir + "/error_corpus/" + language_name + "_errors.txt";
for (TestEntry &entry : parse_test_entries(read_file(error_test_filename)))
for (TestEntry &entry : parse_test_entries(read_file(error_test_filename))) {
result.push_back(entry);
}
return result;
}
vector<TestEntry> read_test_language_corpus(string language_name) {
vector<TestEntry> result;
string test_directory = fixtures_dir + "test_grammars/" + language_name;
for (string &test_filename : list_directory(test_directory)) {
for (TestEntry &entry : parse_test_entries(read_file(test_directory + "/" + test_filename))) {
result.push_back(entry);
}
}
return result;
}

View file

@ -10,6 +10,7 @@ struct TestEntry {
std::string tree_string;
};
std::vector<TestEntry> read_corpus_entries(std::string directory);
std::vector<TestEntry> read_real_language_corpus(std::string name);
std::vector<TestEntry> read_test_language_corpus(std::string name);
#endif

View file

@ -1,847 +0,0 @@
#include "spec_helper.h"
#include "runtime/alloc.h"
#include "helpers/load_language.h"
#include "helpers/stderr_logger.h"
#include "helpers/dedent.h"
#include "compiler/util/string_helpers.h"
#include <map>
static string fill_template(string input, map<string, string> parameters) {
string result = input;
for (const auto &pair : parameters) {
util::str_replace(&result, "{{" + pair.first + "}}", pair.second);
}
return result;
}
START_TEST
describe("compile_grammar", []() {
TSDocument *document;
before_each([&]() {
document = ts_document_new();
});
after_each([&]() {
ts_document_free(document);
});
auto assert_root_node = [&](const string &expected_string) {
TSNode root_node = ts_document_root_node(document);
char *node_string = ts_node_string(root_node, document);
AssertThat(node_string, Equals(expected_string));
ts_free(node_string);
};
describe("conflicts", [&]() {
it("can resolve shift/reduce conflicts using associativities", [&]() {
string grammar_template = R"JSON({
"name": "associativity_example",
"rules": {
"expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "math_operation"},
{"type": "SYMBOL", "name": "identifier"}
]
},
"math_operation": {
"type": "{{math_operation_prec_type}}",
"value": 0,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "+"},
{"type": "SYMBOL", "name": "expression"}
]
}
},
"identifier": {
"type": "PATTERN",
"value": "[a-zA-Z]+"
}
}
})JSON";
// Ambiguity, which '+' applies first?
ts_document_set_input_string(document, "x+y+z");
TSCompileResult result = ts_compile_grammar(fill_template(grammar_template, {
{"math_operation_prec_type", "PREC"}
}).c_str());
AssertThat(result.error_message, Equals(dedent(R"MESSAGE(
Unresolved conflict for symbol sequence:
expression '+' expression '+'
Possible interpretations:
1: (math_operation expression '+' expression) '+'
2: expression '+' (math_operation expression '+' expression)
Possible resolutions:
1: Specify a left or right associativity in `math_operation`
2: Add a conflict for these rules: `math_operation`
)MESSAGE")));
result = ts_compile_grammar(fill_template(grammar_template, {
{"math_operation_prec_type", "PREC_LEFT"}
}).c_str());
ts_document_set_language(document, load_compile_result("associativity_example", result));
ts_document_parse(document);
assert_root_node("(expression (math_operation "
"(expression (math_operation (expression (identifier)) (expression (identifier)))) "
"(expression (identifier))))");
result = ts_compile_grammar(fill_template(grammar_template, {
{"math_operation_prec_type", "PREC_RIGHT"}
}).c_str());
ts_document_set_language(document, load_compile_result("associativity_example", result));
ts_document_parse(document);
assert_root_node("(expression (math_operation "
"(expression (identifier)) "
"(expression (math_operation (expression (identifier)) (expression (identifier))))))");
});
it("can resolve shift/reduce conflicts involving single-child rules using precedence", [&]() {
string grammar_template = R"JSON({
"name": "associativity_example",
"extras": [
{"type": "PATTERN", "value": "\\s"}
],
"rules": {
"expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "function_call"},
{"type": "SYMBOL", "name": "identifier"}
]
},
"function_call": {
"type": "PREC_RIGHT",
"value": {{function_call_precedence}},
"content": {
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{"type": "SYMBOL", "name": "expression"}
]
},
{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{"type": "SYMBOL", "name": "block"}
]
},
{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{"type": "SYMBOL", "name": "expression"},
{"type": "SYMBOL", "name": "block"}
]
}
]
}
},
"block": {
"type": "SEQ",
"members": [
{"type": "STRING", "value": "{"},
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "}"}
]
},
"identifier": {
"type": "PATTERN",
"value": "[a-zA-Z]+"
}
}
})JSON";
// Ambiguity: is the trailing block associated with `bar` or `foo`?
ts_document_set_input_string(document, "foo bar { baz }");
TSCompileResult result = ts_compile_grammar(fill_template(grammar_template, {
{"function_call_precedence", "0"}
}).c_str());
AssertThat(result.error_message, Equals(dedent(R"MESSAGE(
Unresolved conflict for symbol sequence:
identifier '{'
Possible interpretations:
1: (expression identifier) '{'
2: (function_call identifier block)
Possible resolutions:
1: Specify a higher precedence in `function_call` than in the other rules.
2: Specify a higher precedence in `expression` than in the other rules.
3: Specify a left or right associativity in `expression`
4: Add a conflict for these rules: `expression` `function_call`
)MESSAGE")));
// Giving function calls lower precedence than expressions causes `bar`
// to be treated as an expression passed to `foo`, not as a function
// that's being called with a block.
result = ts_compile_grammar(fill_template(grammar_template, {
{"function_call_precedence", "-1"}
}).c_str());
AssertThat(result.error_message, IsNull());
ts_document_set_language(document, load_compile_result("associativity_example", result));
ts_document_parse(document);
assert_root_node("(expression (function_call "
"(identifier) "
"(expression (identifier)) "
"(block (expression (identifier)))))");
// Giving function calls higher precedence than expressions causes `bar`
// to be treated as a function that's being called with a block, not as
// an expression passed to `foo`.
result = ts_compile_grammar(fill_template(grammar_template, {
{"function_call_precedence", "1"}
}).c_str());
AssertThat(result.error_message, IsNull());
ts_document_set_language(document, load_compile_result("associativity_example", result));
ts_document_set_input_string(document, "foo bar { baz }");
ts_document_parse(document);
assert_root_node("(expression (function_call "
"(identifier) "
"(expression (function_call "
"(identifier) "
"(block (expression (identifier)))))))");
});
it("handles precedence applied to specific rule subsequences (regression)", [&]() {
TSCompileResult result = ts_compile_grammar(R"JSON({
"name": "precedence_on_subsequence",
"extras": [
{"type": "STRING", "value": " "}
],
"rules": {
"expression": {
"type": "PREC_LEFT",
"value": 0,
"content": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "function_call"},
{"type": "SYMBOL", "name": "identifier"},
{"type": "SYMBOL", "name": "scope_resolution"}
]
}
},
"function_call": {
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{"type": "SYMBOL", "name": "expression"}
]
},
{
"type": "PREC",
"value": 1,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{"type": "SYMBOL", "name": "block"}
]
}
},
{
"type": "PREC",
"value": -1,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{"type": "SYMBOL", "name": "do_block"}
]
}
},
{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{
"type": "PREC",
"value": 1,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "SYMBOL", "name": "block"}
]
}
}
]
},
{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "identifier"},
{
"type": "PREC",
"value": -1,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "SYMBOL", "name": "do_block"}
]
}
}
]
}
]
},
"scope_resolution": {
"type": "PREC_LEFT",
"value": 1,
"content": {
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "::"},
{"type": "SYMBOL", "name": "expression"}
]
},
{
"type": "SEQ",
"members": [
{"type": "STRING", "value": "::"},
{"type": "SYMBOL", "name": "expression"},
]
}
]
}
},
"block": {
"type": "STRING",
"value": "{}"
},
"do_block": {
"type": "STRING",
"value": "do end"
},
"identifier": {
"type": "PATTERN",
"value": "[a-zA-Z]+"
}
}
})JSON");
auto language = load_compile_result("precedence_on_subsequence", result);
ts_document_set_language(document, language);
ts_document_set_input_string(document, "a b {}");
ts_document_parse(document);
assert_root_node("(expression (function_call "
"(identifier) "
"(expression (function_call (identifier) (block)))))");
ts_document_set_input_string(document, "a b do end");
ts_document_parse(document);
assert_root_node("(expression (function_call "
"(identifier) "
"(expression (identifier)) "
"(do_block)))");
});
it("does not allow conflicting precedences", [&]() {
string grammar_template = R"JSON({
"name": "conflicting_precedence_example",
"rules": {
"expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "sum"},
{"type": "SYMBOL", "name": "product"},
{"type": "SYMBOL", "name": "other_thing"}
]
},
"sum": {
"type": "PREC_LEFT",
"value": 0,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "+"},
{"type": "SYMBOL", "name": "expression"}
]
}
},
"product": {
"type": "PREC_LEFT",
"value": 1,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "*"},
{"type": "SYMBOL", "name": "expression"}
]
}
},
"other_thing": {
"type": "PREC_LEFT",
"value": -1,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "*"},
{"type": "STRING", "value": "*"}
]
}
},
"identifier": {
"type": "PATTERN",
"value": "[a-zA-Z]+"
}
}
})JSON";
TSCompileResult result = ts_compile_grammar(fill_template(grammar_template, {
}).c_str());
AssertThat(result.error_message, Equals(dedent(R"MESSAGE(
Unresolved conflict for symbol sequence:
expression '+' expression '*'
Possible interpretations:
1: (sum expression '+' expression) '*'
2: expression '+' (product expression '*' expression)
3: expression '+' (other_thing expression '*' '*')
Possible resolutions:
1: Specify a higher precedence in `product` and `other_thing` than in the other rules.
2: Specify a higher precedence in `sum` than in the other rules.
3: Add a conflict for these rules: `sum` `product` `other_thing`
)MESSAGE")));
});
});
describe("when the grammar contains rules that match the empty string", [&]() {
it("reports an error", [&]() {
TSCompileResult result = ts_compile_grammar(R"JSON(
{
"name": "empty_rules",
"rules": {
"rule_1": {"type": "SYMBOL", "name": "rule_2"},
"rule_2": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "rule_1"},
{"type": "BLANK"}
]
}
}
}
)JSON");
AssertThat(result.error_message, Equals(dedent(R"MESSAGE(
The rule `rule_2` matches the empty string.
Tree-sitter currently does not support syntactic rules that match the empty string.
)MESSAGE")));
});
});
describe("external scanners", [&]() {
it("can tokenize using arbitrary user-defined scanner functions", [&]() {
string grammar = R"JSON({
"name": "external_scanner_example",
"externals": [
"_percent_string",
"_percent_string_start",
"_percent_string_end"
],
"extras": [
{"type": "PATTERN", "value": "\\s"}
],
"rules": {
"expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "string"},
{"type": "SYMBOL", "name": "sum"},
{"type": "SYMBOL", "name": "identifier"}
]
},
"sum": {
"type": "PREC_LEFT",
"value": 0,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "+"},
{"type": "SYMBOL", "name": "expression"}
]
}
},
"string": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "_percent_string"},
{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "_percent_string_start"},
{"type": "SYMBOL", "name": "expression"},
{"type": "SYMBOL", "name": "_percent_string_end"}
]
},
]
},
"identifier": {
"type": "PATTERN",
"value": "\\a+"
}
}
})JSON";
TSCompileResult result = ts_compile_grammar(grammar.c_str());
AssertThat(result.error_message, IsNull());
ts_document_set_language(document, load_compile_result(
"external_scanner_example",
result,
"spec/fixtures/external_scanners/percent_strings.c"
));
ts_document_set_input_string(document, "x + %(sup (external) scanner?)");
ts_document_parse(document);
assert_root_node("(expression (sum (expression (identifier)) (expression (string))))");
ts_document_set_input_string(document, "%{sup {} #{x + y} {} scanner?}");
ts_document_parse(document);
assert_root_node("(expression (string (expression (sum (expression (identifier)) (expression (identifier))))))");
});
it("allows external scanners to refer to tokens that are defined internally", [&]() {
string grammar = R"JSON({
"name": "shared_external_tokens",
"externals": [
"string",
"line_break"
],
"extras": [
{"type": "PATTERN", "value": "\\s"}
],
"rules": {
"statement": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "_expression"},
{"type": "SYMBOL", "name": "_expression"},
{"type": "SYMBOL", "name": "line_break"}
]
},
"_expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "string"},
{"type": "SYMBOL", "name": "variable"},
{"type": "SYMBOL", "name": "number"}
]
},
"variable": {"type": "PATTERN", "value": "\\a+"},
"number": {"type": "PATTERN", "value": "\\d+"},
"line_break": {"type": "STRING", "value": "\n"}
}
})JSON";
TSCompileResult result = ts_compile_grammar(grammar.c_str());
AssertThat(result.error_message, IsNull());
ts_document_set_language(document, load_compile_result(
"shared_external_tokens",
result,
"spec/fixtures/external_scanners/shared_external_tokens.c"
));
ts_document_set_input_string(document, "a b\n");
ts_document_parse(document);
assert_root_node("(statement (variable) (variable) (line_break))");
ts_document_set_input_string(document, "a \nb\n");
ts_document_parse(document);
assert_root_node("(statement (variable) (variable) (line_break))");
ts_document_set_input_string(document, "'hello' 'world'\n");
ts_document_parse(document);
assert_root_node("(statement (string) (string) (line_break))");
ts_document_set_input_string(document, "'hello' \n'world'\n");
ts_document_parse(document);
assert_root_node("(statement (string) (string) (line_break))");
});
it("allows external tokens to be used as extras", [&]() {
string grammar = R"JSON({
"name": "extra_external_tokens",
"externals": [
"comment"
],
"extras": [
{"type": "PATTERN", "value": "\\s"},
{"type": "SYMBOL", "name": "comment"}
],
"rules": {
"assignment": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "variable"},
{"type": "STRING", "value": "="},
{"type": "SYMBOL", "name": "variable"}
]
},
"variable": {"type": "PATTERN", "value": "\\a+"}
}
})JSON";
TSCompileResult result = ts_compile_grammar(grammar.c_str());
AssertThat(result.error_message, IsNull());
ts_document_set_language(document, load_compile_result(
"extra_external_tokens",
result,
"spec/fixtures/external_scanners/extra_external_tokens.c"
));
ts_document_set_input_string(document, "x = # a comment\n y");
ts_document_parse(document);
assert_root_node("(assignment (variable) (comment) (variable))");
});
});
describe("when the grammar's start symbol is a token", [&]() {
it("parses the token", [&]() {
TSCompileResult result = ts_compile_grammar(R"JSON(
{
"name": "one_token_language",
"rules": {
"first_rule": {"type": "STRING", "value": "the-value"}
}
}
)JSON");
ts_document_set_language(document, load_compile_result("one_token_language", result));
ts_document_set_input_string(document, "the-value");
ts_document_parse(document);
assert_root_node("(first_rule)");
});
});
describe("when the grammar's start symbol is blank", [&]() {
it("parses the empty string", [&]() {
TSCompileResult result = ts_compile_grammar(R"JSON(
{
"name": "blank_language",
"rules": {
"first_rule": {"type": "BLANK"}
}
}
)JSON");
ts_document_set_language(document, load_compile_result("blank_language", result));
ts_document_set_input_string(document, "");
ts_document_parse(document);
assert_root_node("(first_rule)");
});
});
describe("when the grammar contains anonymous tokens with escaped characters", [&]() {
it("escapes the escaped characters properly in the generated parser", [&]() {
TSCompileResult result = ts_compile_grammar(R"JSON(
{
"name": "escaped_char_language",
"rules": {
"first_rule": {
"type": "CHOICE",
"members": [
{"type": "STRING", "value": "\n"},
{"type": "STRING", "value": "\r"},
{"type": "STRING", "value": "'hello'"},
{"type": "PATTERN", "value": "\\d+"}
]
}
}
}
)JSON");
ts_document_set_language(document, load_compile_result("escaped_char_language", result));
ts_document_set_input_string(document, "1234");
ts_document_parse(document);
assert_root_node("(first_rule)");
ts_document_set_input_string(document, "\n");
ts_document_parse(document);
assert_root_node("(first_rule)");
ts_document_set_input_string(document, "'hello'");
ts_document_parse(document);
assert_root_node("(first_rule)");
});
});
describe("the grammar in the README", [&]() {
it("parses the input in the README", [&]() {
TSCompileResult result = ts_compile_grammar(R"JSON(
{
"name": "arithmetic",
// Things that can appear anywhere in the language, like comments
// and whitespace, are expressed as 'extras'.
"extras": [
{"type": "PATTERN", "value": "\\s"},
{"type": "SYMBOL", "name": "comment"}
],
"rules": {
// The first rule listed in the grammar becomes the 'start rule'.
"expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "sum"},
{"type": "SYMBOL", "name": "product"},
{"type": "SYMBOL", "name": "number"},
{"type": "SYMBOL", "name": "variable"},
{
"type": "SEQ",
"members": [
{"type": "STRING", "value": "("},
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": ")"}
]
}
]
},
// Tokens like '+' and '*' are described directly within the
// grammar's rules, as opposed to in a seperate lexer description.
"sum": {
"type": "PREC_LEFT",
"value": 1,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "+"},
{"type": "SYMBOL", "name": "expression"}
]
}
},
// Ambiguities can be resolved at compile time by assigning precedence
// values to rule subtrees.
"product": {
"type": "PREC_LEFT",
"value": 2,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "*"},
{"type": "SYMBOL", "name": "expression"}
]
}
},
// Tokens can be specified using ECMAScript regexps.
"number": {"type": "PATTERN", "value": "\\d+"},
"comment": {"type": "PATTERN", "value": "#.*"},
"variable": {"type": "PATTERN", "value": "[a-zA-Z]\\w*"}
}
}
)JSON");
const TSLanguage *language = load_compile_result("arithmetic", result);
ts_document_set_language(document, language);
ts_document_set_input_string(document, "a + b * c");
ts_document_parse(document);
assert_root_node(
"(expression (sum "
"(expression (variable)) "
"(expression (product "
"(expression (variable)) "
"(expression (variable))))))");
});
});
});
END_TEST

View file

@ -1,185 +0,0 @@
#include "spec_helper.h"
#include "runtime/alloc.h"
#include "helpers/load_language.h"
#include "helpers/read_test_entries.h"
#include "helpers/spy_input.h"
#include "helpers/stderr_logger.h"
#include "helpers/point_helpers.h"
#include "helpers/encoding_helpers.h"
#include "helpers/record_alloc.h"
#include "helpers/random_helpers.h"
#include "helpers/scope_sequence.h"
#include <set>
static void assert_correct_tree_shape(const TSDocument *document, string tree_string) {
TSNode root_node = ts_document_root_node(document);
const char *node_string = ts_node_string(root_node, document);
string result(node_string);
ts_free((void *)node_string);
AssertThat(result, Equals(tree_string));
}
static void assert_consistent_sizes(TSNode node) {
size_t child_count = ts_node_child_count(node);
size_t start_byte = ts_node_start_byte(node);
size_t end_byte = ts_node_end_byte(node);
TSPoint start_point = ts_node_start_point(node);
TSPoint end_point = ts_node_end_point(node);
bool some_child_has_changes = false;
AssertThat(start_byte, !IsGreaterThan(end_byte));
AssertThat(start_point, !IsGreaterThan(end_point));
size_t last_child_end_byte = start_byte;
TSPoint last_child_end_point = start_point;
for (size_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
size_t child_start_byte = ts_node_start_byte(child);
TSPoint child_start_point = ts_node_start_point(child);
AssertThat(child_start_byte, !IsLessThan(last_child_end_byte));
AssertThat(child_start_point, !IsLessThan(last_child_end_point));
assert_consistent_sizes(child);
if (ts_node_has_changes(child))
some_child_has_changes = true;
last_child_end_byte = ts_node_end_byte(child);
last_child_end_point = ts_node_end_point(child);
}
if (child_count > 0) {
AssertThat(end_byte, !IsLessThan(last_child_end_byte));
AssertThat(end_point, !IsLessThan(last_child_end_point));
}
if (some_child_has_changes) {
AssertThat(ts_node_has_changes(node), IsTrue());
}
}
static void assert_correct_tree_size(TSDocument *document, string content) {
TSNode root_node = ts_document_root_node(document);
size_t expected_size = content.size();
// In the JSON grammar, the start rule (`_value`) is hidden, so the node
// returned from `ts_document_root_node` (e.g. an `object` node), does not
// actually point to the root of the tree. In this weird case, trailing
// whitespace is not included in the root node's size.
//
// TODO: Fix this inconsistency. Maybe disallow the start rule being hidden?
if (ts_document_language(document) == get_test_language("json") &&
string(ts_node_type(root_node, document)) != "ERROR")
expected_size = content.find_last_not_of("\n ") + 1;
AssertThat(ts_node_end_byte(root_node), Equals(expected_size));
assert_consistent_sizes(root_node);
}
START_TEST
describe("The Corpus", []() {
vector<string> test_languages({
"javascript",
"json",
"c",
"cpp",
"python",
});
for (auto &language_name : test_languages) {
describe(("the " + language_name + " language").c_str(), [&]() {
TSDocument *document;
before_each([&]() {
record_alloc::start();
document = ts_document_new();
ts_document_set_language(document, get_test_language(language_name));
// ts_document_set_logger(document, stderr_logger_new(true));
// ts_document_print_debugging_graphs(document, true);
});
after_each([&]() {
ts_document_free(document);
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
});
for (auto &entry : read_corpus_entries(language_name)) {
SpyInput *input;
auto it_handles_edit_sequence = [&](string name, std::function<void()> edit_sequence){
it(("parses " + entry.description + ": " + name).c_str(), [&]() {
input = new SpyInput(entry.input, 3);
ts_document_set_input(document, input->input());
edit_sequence();
assert_correct_tree_shape(document, entry.tree_string);
assert_correct_tree_size(document, input->content);
delete input;
});
};
it_handles_edit_sequence("initial parse", [&]() {
ts_document_parse(document);
});
std::set<std::pair<size_t, size_t>> deletions;
std::set<std::pair<size_t, string>> insertions;
for (size_t i = 0; i < 60; i++) {
size_t edit_position = random() % utf8_char_count(entry.input);
size_t deletion_size = random() % (utf8_char_count(entry.input) - edit_position);
string inserted_text = random_words(random() % 4 + 1);
if (insertions.insert({edit_position, inserted_text}).second) {
string description = "\"" + inserted_text + "\" at " + to_string(edit_position);
it_handles_edit_sequence("repairing an insertion of " + description, [&]() {
ts_document_edit(document, input->replace(edit_position, 0, inserted_text));
ts_document_parse(document);
assert_correct_tree_size(document, input->content);
ts_document_edit(document, input->undo());
assert_correct_tree_size(document, input->content);
TSRange *ranges;
uint32_t range_count;
ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content);
ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count);
ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content);
verify_changed_ranges(old_scope_sequence, new_scope_sequence,
input->content, ranges, range_count);
ts_free(ranges);
});
}
if (deletions.insert({edit_position, deletion_size}).second) {
string desription = to_string(edit_position) + "-" + to_string(edit_position + deletion_size);
it_handles_edit_sequence("repairing a deletion of " + desription, [&]() {
ts_document_edit(document, input->replace(edit_position, deletion_size, ""));
ts_document_parse(document);
assert_correct_tree_size(document, input->content);
ts_document_edit(document, input->undo());
assert_correct_tree_size(document, input->content);
TSRange *ranges;
uint32_t range_count;
ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content);
ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count);
ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content);
verify_changed_ranges(old_scope_sequence, new_scope_sequence,
input->content, ranges, range_count);
ts_free(ranges);
});
}
}
}
});
}
});
END_TEST

View file

@ -0,0 +1,181 @@
#include "spec_helper.h"
#include "runtime/alloc.h"
#include "helpers/load_language.h"
#include "helpers/read_test_entries.h"
#include "helpers/spy_input.h"
#include "helpers/stderr_logger.h"
#include "helpers/point_helpers.h"
#include "helpers/encoding_helpers.h"
#include "helpers/record_alloc.h"
#include "helpers/random_helpers.h"
#include "helpers/scope_sequence.h"
#include <set>
static void assert_consistent_sizes(TSNode node) {
size_t child_count = ts_node_child_count(node);
size_t start_byte = ts_node_start_byte(node);
size_t end_byte = ts_node_end_byte(node);
TSPoint start_point = ts_node_start_point(node);
TSPoint end_point = ts_node_end_point(node);
bool some_child_has_changes = false;
AssertThat(start_byte, !IsGreaterThan(end_byte));
AssertThat(start_point, !IsGreaterThan(end_point));
size_t last_child_end_byte = start_byte;
TSPoint last_child_end_point = start_point;
for (size_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
size_t child_start_byte = ts_node_start_byte(child);
TSPoint child_start_point = ts_node_start_point(child);
AssertThat(child_start_byte, !IsLessThan(last_child_end_byte));
AssertThat(child_start_point, !IsLessThan(last_child_end_point));
assert_consistent_sizes(child);
if (ts_node_has_changes(child))
some_child_has_changes = true;
last_child_end_byte = ts_node_end_byte(child);
last_child_end_point = ts_node_end_point(child);
}
if (child_count > 0) {
AssertThat(end_byte, !IsLessThan(last_child_end_byte));
AssertThat(end_point, !IsLessThan(last_child_end_point));
}
if (some_child_has_changes) {
AssertThat(ts_node_has_changes(node), IsTrue());
}
}
static void assert_correct_tree_size(TSDocument *document, string content) {
TSNode root_node = ts_document_root_node(document);
size_t expected_size = content.size();
// In the JSON grammar, the start rule (`_value`) is hidden, so the node
// returned from `ts_document_root_node` (e.g. an `object` node), does not
// actually point to the root of the tree. In this weird case, trailing
// whitespace is not included in the root node's size.
//
// TODO: Fix this inconsistency. Maybe disallow the start rule being hidden?
if (ts_document_language(document) == load_real_language("json") &&
string(ts_node_type(root_node, document)) != "ERROR")
expected_size = content.find_last_not_of("\n ") + 1;
AssertThat(ts_node_end_byte(root_node), Equals(expected_size));
assert_consistent_sizes(root_node);
}
START_TEST
vector<string> test_languages({
"javascript",
"json",
"c",
"cpp",
"python",
});
for (auto &language_name : test_languages) {
describe(("the " + language_name + " language").c_str(), [&]() {
TSDocument *document;
before_each([&]() {
record_alloc::start();
document = ts_document_new();
ts_document_set_language(document, load_real_language(language_name));
// ts_document_set_logger(document, stderr_logger_new(true));
// ts_document_print_debugging_graphs(document, true);
});
after_each([&]() {
ts_document_free(document);
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
});
for (auto &entry : read_real_language_corpus(language_name)) {
SpyInput *input;
auto it_handles_edit_sequence = [&](string name, std::function<void()> edit_sequence){
it(("parses " + entry.description + ": " + name).c_str(), [&]() {
input = new SpyInput(entry.input, 3);
ts_document_set_input(document, input->input());
edit_sequence();
TSNode root_node = ts_document_root_node(document);
const char *node_string = ts_node_string(root_node, document);
string result(node_string);
ts_free((void *)node_string);
AssertThat(result, Equals(entry.tree_string));
assert_correct_tree_size(document, input->content);
delete input;
});
};
it_handles_edit_sequence("initial parse", [&]() {
ts_document_parse(document);
});
std::set<std::pair<size_t, size_t>> deletions;
std::set<std::pair<size_t, string>> insertions;
for (size_t i = 0; i < 60; i++) {
size_t edit_position = random() % utf8_char_count(entry.input);
size_t deletion_size = random() % (utf8_char_count(entry.input) - edit_position);
string inserted_text = random_words(random() % 4 + 1);
if (insertions.insert({edit_position, inserted_text}).second) {
string description = "\"" + inserted_text + "\" at " + to_string(edit_position);
it_handles_edit_sequence("repairing an insertion of " + description, [&]() {
ts_document_edit(document, input->replace(edit_position, 0, inserted_text));
ts_document_parse(document);
assert_correct_tree_size(document, input->content);
ts_document_edit(document, input->undo());
assert_correct_tree_size(document, input->content);
TSRange *ranges;
uint32_t range_count;
ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content);
ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count);
ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content);
verify_changed_ranges(old_scope_sequence, new_scope_sequence,
input->content, ranges, range_count);
ts_free(ranges);
});
}
if (deletions.insert({edit_position, deletion_size}).second) {
string desription = to_string(edit_position) + "-" + to_string(edit_position + deletion_size);
it_handles_edit_sequence("repairing a deletion of " + desription, [&]() {
ts_document_edit(document, input->replace(edit_position, deletion_size, ""));
ts_document_parse(document);
assert_correct_tree_size(document, input->content);
ts_document_edit(document, input->undo());
assert_correct_tree_size(document, input->content);
TSRange *ranges;
uint32_t range_count;
ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content);
ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count);
ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content);
verify_changed_ranges(old_scope_sequence, new_scope_sequence,
input->content, ranges, range_count);
ts_free(ranges);
});
}
}
}
});
}
END_TEST

View file

@ -0,0 +1,78 @@
#include "spec_helper.h"
#include "helpers/read_test_entries.h"
#include "helpers/load_language.h"
#include "helpers/stderr_logger.h"
#include "helpers/file_helpers.h"
#include "runtime/alloc.h"
START_TEST
string grammars_dir_path = "spec/fixtures/test_grammars";
vector<string> test_languages = list_directory(grammars_dir_path);
for (auto &language_name : test_languages) {
if (language_name == "readme.md") continue;
describe(("test language: " + language_name).c_str(), [&]() {
string directory_path = grammars_dir_path + "/" + language_name;
string grammar_path = directory_path + "/grammar.json";
string external_scanner_path = directory_path + "/scanner.c";
string expected_error_path = directory_path + "/expected_error.txt";
string corpus_path = directory_path + "/corpus.txt";
if (!file_exists(external_scanner_path)) {
external_scanner_path = "";
}
string grammar_json = read_file(grammar_path);
TSCompileResult compile_result = ts_compile_grammar(grammar_json.c_str());
if (file_exists(expected_error_path)) {
it("fails with the correct error message", [&]() {
string expected_error = read_file(expected_error_path);
AssertThat((void *)compile_result.error_message, !IsNull());
AssertThat(compile_result.error_message, Equals(expected_error));
});
return;
} else {
TSDocument *document = nullptr;
const TSLanguage *language = nullptr;
before_each([&]() {
if (!language) {
language = load_test_language(
language_name,
compile_result,
external_scanner_path
);
}
document = ts_document_new();
ts_document_set_language(document, language);
// ts_document_set_logger(document, stderr_logger_new(true));
// ts_document_print_debugging_graphs(document, true);
});
after_each([&]() {
if (document) ts_document_free(document);
});
for (auto &entry : read_test_language_corpus(language_name)) {
it(("parses " + entry.description).c_str(), [&]() {
ts_document_set_input_string_with_length(document, entry.input.c_str(), entry.input.size());
ts_document_parse(document);
TSNode root_node = ts_document_root_node(document);
const char *node_string = ts_node_string(root_node, document);
string result(node_string);
ts_free((void *)node_string);
AssertThat(result, Equals(entry.tree_string));
});
}
}
});
}
END_TEST

View file

@ -43,7 +43,7 @@ describe("Document", [&]() {
before_each([&]() {
spy_input = new SpyInput("{\"key\": [null, 2]}", 3);
ts_document_set_language(document, get_test_language("json"));
ts_document_set_language(document, load_real_language("json"));
ts_document_set_input_string(document, "{\"key\": [1, 2]}");
ts_document_parse(document);
@ -152,7 +152,7 @@ describe("Document", [&]() {
});
it("uses the given language for future parses", [&]() {
ts_document_set_language(document, get_test_language("json"));
ts_document_set_language(document, load_real_language("json"));
ts_document_parse(document);
root = ts_document_root_node(document);
@ -162,10 +162,10 @@ describe("Document", [&]() {
});
it("clears out any previous tree", [&]() {
ts_document_set_language(document, get_test_language("json"));
ts_document_set_language(document, load_real_language("json"));
ts_document_parse(document);
ts_document_set_language(document, get_test_language("javascript"));
ts_document_set_language(document, load_real_language("javascript"));
AssertThat(ts_document_root_node(document).data, Equals<void *>(nullptr));
ts_document_parse(document);
@ -177,7 +177,7 @@ describe("Document", [&]() {
});
it("does not allow setting a language with a different version number", [&]() {
TSLanguage language = *get_test_language("json");
TSLanguage language = *load_real_language("json");
AssertThat(ts_language_version(&language), Equals<uint32_t>(TREE_SITTER_LANGUAGE_VERSION));
language.version++;
@ -193,7 +193,7 @@ describe("Document", [&]() {
before_each([&]() {
logger = new SpyLogger();
ts_document_set_language(document, get_test_language("json"));
ts_document_set_language(document, load_real_language("json"));
ts_document_set_input_string(document, "[1, 2]");
});
@ -235,7 +235,7 @@ describe("Document", [&]() {
SpyInput *input;
before_each([&]() {
ts_document_set_language(document, get_test_language("javascript"));
ts_document_set_language(document, load_real_language("javascript"));
input = new SpyInput("{a: null};", 3);
ts_document_set_input(document, input->input());
ts_document_parse(document);

View file

@ -40,7 +40,7 @@ describe("Node", []() {
record_alloc::start();
document = ts_document_new();
ts_document_set_language(document, get_test_language("json"));
ts_document_set_language(document, load_real_language("json"));
ts_document_set_input_string(document, input_string.c_str());
ts_document_parse(document);

View file

@ -83,7 +83,7 @@ describe("Parser", [&]() {
describe("handling errors", [&]() {
describe("when there is an invalid substring right before a valid token", [&]() {
it("computes the error node's size and position correctly", [&]() {
ts_document_set_language(document, get_test_language("json"));
ts_document_set_language(document, load_real_language("json"));
set_text(" [123, @@@@@, true]");
assert_root_node(
@ -108,7 +108,7 @@ describe("Parser", [&]() {
describe("when there is an unexpected string in the middle of a token", [&]() {
it("computes the error node's size and position correctly", [&]() {
ts_document_set_language(document, get_test_language("json"));
ts_document_set_language(document, load_real_language("json"));
set_text(" [123, faaaaalse, true]");
assert_root_node(
@ -134,7 +134,7 @@ describe("Parser", [&]() {
describe("when there is one unexpected token between two valid tokens", [&]() {
it("computes the error node's size and position correctly", [&]() {
ts_document_set_language(document, get_test_language("json"));
ts_document_set_language(document, load_real_language("json"));
set_text(" [123, true false, true]");
assert_root_node(
@ -153,7 +153,7 @@ describe("Parser", [&]() {
describe("when there is an unexpected string at the end of a token", [&]() {
it("computes the error's size and position correctly", [&]() {
ts_document_set_language(document, get_test_language("json"));
ts_document_set_language(document, load_real_language("json"));
set_text(" [123, \"hi\n, true]");
assert_root_node(
@ -163,7 +163,7 @@ describe("Parser", [&]() {
describe("when there is an unterminated error", [&]() {
it("maintains a consistent tree", [&]() {
ts_document_set_language(document, get_test_language("javascript"));
ts_document_set_language(document, load_real_language("javascript"));
set_text("a; /* b");
assert_root_node(
"(ERROR (program (expression_statement (identifier))) (UNEXPECTED EOF))");
@ -172,7 +172,7 @@ describe("Parser", [&]() {
describe("when there are extra tokens at the end of the viable prefix", [&]() {
it("does not include them in the error node", [&]() {
ts_document_set_language(document, get_test_language("javascript"));
ts_document_set_language(document, load_real_language("javascript"));
set_text(
"var x;\n"
"\n"
@ -192,7 +192,7 @@ describe("Parser", [&]() {
describe("handling extra tokens", [&]() {
describe("when the token appears as part of a grammar rule", [&]() {
it("incorporates it into the tree", [&]() {
ts_document_set_language(document, get_test_language("javascript"));
ts_document_set_language(document, load_real_language("javascript"));
set_text("fn()\n");
assert_root_node(
@ -202,7 +202,7 @@ describe("Parser", [&]() {
describe("when the token appears somewhere else", [&]() {
it("incorporates it into the tree", [&]() {
ts_document_set_language(document, get_test_language("javascript"));
ts_document_set_language(document, load_real_language("javascript"));
set_text(
"fn()\n"
" .otherFn();");
@ -218,7 +218,7 @@ describe("Parser", [&]() {
describe("when several extra tokens appear in a row", [&]() {
it("incorporates them into the tree", [&]() {
ts_document_set_language(document, get_test_language("javascript"));
ts_document_set_language(document, load_real_language("javascript"));
set_text(
"fn()\n\n"
"// This is a comment"
@ -239,7 +239,7 @@ describe("Parser", [&]() {
describe("editing", [&]() {
describe("creating new tokens near the end of the input", [&]() {
it("updates the parse tree and re-reads only the changed portion of the text", [&]() {
ts_document_set_language(document, get_test_language("javascript"));
ts_document_set_language(document, load_real_language("javascript"));
set_text("x * (100 + abc);");
assert_root_node(
@ -262,7 +262,7 @@ describe("Parser", [&]() {
it("updates the parse tree and re-reads only the changed portion of the input", [&]() {
chunk_size = 2;
ts_document_set_language(document, get_test_language("javascript"));
ts_document_set_language(document, load_real_language("javascript"));
set_text("123 + 456 * (10 + x);");
assert_root_node(
@ -285,7 +285,7 @@ describe("Parser", [&]() {
describe("introducing an error", [&]() {
it("gives the error the right size", [&]() {
ts_document_set_language(document, get_test_language("javascript"));
ts_document_set_language(document, load_real_language("javascript"));
set_text("var x = y;");
assert_root_node(
@ -308,7 +308,7 @@ describe("Parser", [&]() {
describe("into the middle of an existing token", [&]() {
it("updates the parse tree", [&]() {
ts_document_set_language(document, get_test_language("javascript"));
ts_document_set_language(document, load_real_language("javascript"));
set_text("abc * 123;");
assert_root_node(
@ -327,7 +327,7 @@ describe("Parser", [&]() {
describe("at the end of an existing token", [&]() {
it("updates the parse tree", [&]() {
ts_document_set_language(document, get_test_language("javascript"));
ts_document_set_language(document, load_real_language("javascript"));
set_text("abc * 123;");
assert_root_node(
@ -346,7 +346,7 @@ describe("Parser", [&]() {
describe("inserting text into a node containing a extra token", [&]() {
it("updates the parse tree", [&]() {
ts_document_set_language(document, get_test_language("javascript"));
ts_document_set_language(document, load_real_language("javascript"));
set_text("123 *\n"
"// a-comment\n"
"abc;");
@ -373,7 +373,7 @@ describe("Parser", [&]() {
describe("when a critical token is removed", [&]() {
it("updates the parse tree, creating an error", [&]() {
ts_document_set_language(document, get_test_language("javascript"));
ts_document_set_language(document, load_real_language("javascript"));
set_text("123 * 456; 789 * 123;");
assert_root_node(
@ -392,7 +392,7 @@ describe("Parser", [&]() {
describe("with external tokens", [&]() {
it("maintains the external scanner's state during incremental parsing", [&]() {
ts_document_set_language(document, get_test_language("python"));
ts_document_set_language(document, load_real_language("python"));
string text = dedent(R"PYTHON(
if a:
print b
@ -420,7 +420,7 @@ describe("Parser", [&]() {
});
it("does not try to re-use nodes that are within the edited region", [&]() {
ts_document_set_language(document, get_test_language("javascript"));
ts_document_set_language(document, load_real_language("javascript"));
set_text("{ x: (b.c) };");
assert_root_node(
@ -435,7 +435,7 @@ describe("Parser", [&]() {
});
it("updates the document's parse count", [&]() {
ts_document_set_language(document, get_test_language("javascript"));
ts_document_set_language(document, load_real_language("javascript"));
AssertThat(ts_document_parse_count(document), Equals<size_t>(0));
set_text("{ x: (b.c) };");
@ -449,7 +449,7 @@ describe("Parser", [&]() {
describe("lexing", [&]() {
describe("handling tokens containing wildcard patterns (e.g. comments)", [&]() {
it("terminates them at the end of the document", [&]() {
ts_document_set_language(document, get_test_language("javascript"));
ts_document_set_language(document, load_real_language("javascript"));
set_text("x; // this is a comment");
assert_root_node(
@ -464,7 +464,7 @@ describe("Parser", [&]() {
it("recognizes UTF8 characters as single characters", [&]() {
// 'ΩΩΩ — ΔΔ';
ts_document_set_language(document, get_test_language("javascript"));
ts_document_set_language(document, load_real_language("javascript"));
set_text("'\u03A9\u03A9\u03A9 \u2014 \u0394\u0394';");
assert_root_node(