847 lines
26 KiB
C++
847 lines
26 KiB
C++
#include "spec_helper.h"
|
|
#include "runtime/alloc.h"
|
|
#include "helpers/load_language.h"
|
|
#include "helpers/stderr_logger.h"
|
|
#include "helpers/dedent.h"
|
|
#include "compiler/util/string_helpers.h"
|
|
#include <map>
|
|
|
|
static string fill_template(string input, map<string, string> parameters) {
|
|
string result = input;
|
|
for (const auto &pair : parameters) {
|
|
util::str_replace(&result, "{{" + pair.first + "}}", pair.second);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
START_TEST
|
|
|
|
describe("compile_grammar", []() {
|
|
TSDocument *document;
|
|
|
|
before_each([&]() {
|
|
document = ts_document_new();
|
|
});
|
|
|
|
after_each([&]() {
|
|
ts_document_free(document);
|
|
});
|
|
|
|
auto assert_root_node = [&](const string &expected_string) {
|
|
TSNode root_node = ts_document_root_node(document);
|
|
char *node_string = ts_node_string(root_node, document);
|
|
AssertThat(node_string, Equals(expected_string));
|
|
ts_free(node_string);
|
|
};
|
|
|
|
describe("conflicts", [&]() {
|
|
it("can resolve shift/reduce conflicts using associativities", [&]() {
|
|
string grammar_template = R"JSON({
|
|
"name": "associativity_example",
|
|
|
|
"rules": {
|
|
"expression": {
|
|
"type": "CHOICE",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "math_operation"},
|
|
{"type": "SYMBOL", "name": "identifier"}
|
|
]
|
|
},
|
|
|
|
"math_operation": {
|
|
"type": "{{math_operation_prec_type}}",
|
|
"value": 0,
|
|
"content": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "STRING", "value": "+"},
|
|
{"type": "SYMBOL", "name": "expression"}
|
|
]
|
|
}
|
|
},
|
|
|
|
"identifier": {
|
|
"type": "PATTERN",
|
|
"value": "[a-zA-Z]+"
|
|
}
|
|
}
|
|
})JSON";
|
|
|
|
// Ambiguity, which '+' applies first?
|
|
ts_document_set_input_string(document, "x+y+z");
|
|
|
|
TSCompileResult result = ts_compile_grammar(fill_template(grammar_template, {
|
|
{"math_operation_prec_type", "PREC"}
|
|
}).c_str());
|
|
|
|
AssertThat(result.error_message, Equals(dedent(R"MESSAGE(
|
|
Unresolved conflict for symbol sequence:
|
|
|
|
expression '+' expression • '+' …
|
|
|
|
Possible interpretations:
|
|
|
|
1: (math_operation expression '+' expression) • '+' …
|
|
2: expression '+' (math_operation expression • '+' expression)
|
|
|
|
Possible resolutions:
|
|
|
|
1: Specify a left or right associativity in `math_operation`
|
|
2: Add a conflict for these rules: `math_operation`
|
|
)MESSAGE")));
|
|
|
|
result = ts_compile_grammar(fill_template(grammar_template, {
|
|
{"math_operation_prec_type", "PREC_LEFT"}
|
|
}).c_str());
|
|
|
|
ts_document_set_language(document, load_compile_result("associativity_example", result));
|
|
ts_document_parse(document);
|
|
assert_root_node("(expression (math_operation "
|
|
"(expression (math_operation (expression (identifier)) (expression (identifier)))) "
|
|
"(expression (identifier))))");
|
|
|
|
result = ts_compile_grammar(fill_template(grammar_template, {
|
|
{"math_operation_prec_type", "PREC_RIGHT"}
|
|
}).c_str());
|
|
|
|
ts_document_set_language(document, load_compile_result("associativity_example", result));
|
|
ts_document_parse(document);
|
|
assert_root_node("(expression (math_operation "
|
|
"(expression (identifier)) "
|
|
"(expression (math_operation (expression (identifier)) (expression (identifier))))))");
|
|
});
|
|
|
|
it("can resolve shift/reduce conflicts involving single-child rules using precedence", [&]() {
|
|
string grammar_template = R"JSON({
|
|
"name": "associativity_example",
|
|
|
|
"extras": [
|
|
{"type": "PATTERN", "value": "\\s"}
|
|
],
|
|
|
|
"rules": {
|
|
"expression": {
|
|
"type": "CHOICE",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "function_call"},
|
|
{"type": "SYMBOL", "name": "identifier"}
|
|
]
|
|
},
|
|
|
|
"function_call": {
|
|
"type": "PREC_RIGHT",
|
|
"value": {{function_call_precedence}},
|
|
"content": {
|
|
"type": "CHOICE",
|
|
"members": [
|
|
{
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "identifier"},
|
|
{"type": "SYMBOL", "name": "expression"}
|
|
]
|
|
},
|
|
{
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "identifier"},
|
|
{"type": "SYMBOL", "name": "block"}
|
|
]
|
|
},
|
|
{
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "identifier"},
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "SYMBOL", "name": "block"}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
},
|
|
|
|
"block": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "STRING", "value": "{"},
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "STRING", "value": "}"}
|
|
]
|
|
},
|
|
|
|
"identifier": {
|
|
"type": "PATTERN",
|
|
"value": "[a-zA-Z]+"
|
|
}
|
|
}
|
|
})JSON";
|
|
|
|
// Ambiguity: is the trailing block associated with `bar` or `foo`?
|
|
ts_document_set_input_string(document, "foo bar { baz }");
|
|
|
|
TSCompileResult result = ts_compile_grammar(fill_template(grammar_template, {
|
|
{"function_call_precedence", "0"}
|
|
}).c_str());
|
|
|
|
AssertThat(result.error_message, Equals(dedent(R"MESSAGE(
|
|
Unresolved conflict for symbol sequence:
|
|
|
|
identifier • '{' …
|
|
|
|
Possible interpretations:
|
|
|
|
1: (expression identifier) • '{' …
|
|
2: (function_call identifier • block)
|
|
|
|
Possible resolutions:
|
|
|
|
1: Specify a higher precedence in `function_call` than in the other rules.
|
|
2: Specify a higher precedence in `expression` than in the other rules.
|
|
3: Specify a left or right associativity in `expression`
|
|
4: Add a conflict for these rules: `expression` `function_call`
|
|
)MESSAGE")));
|
|
|
|
// Giving function calls lower precedence than expressions causes `bar`
|
|
// to be treated as an expression passed to `foo`, not as a function
|
|
// that's being called with a block.
|
|
result = ts_compile_grammar(fill_template(grammar_template, {
|
|
{"function_call_precedence", "-1"}
|
|
}).c_str());
|
|
|
|
AssertThat(result.error_message, IsNull());
|
|
ts_document_set_language(document, load_compile_result("associativity_example", result));
|
|
ts_document_parse(document);
|
|
assert_root_node("(expression (function_call "
|
|
"(identifier) "
|
|
"(expression (identifier)) "
|
|
"(block (expression (identifier)))))");
|
|
|
|
// Giving function calls higher precedence than expressions causes `bar`
|
|
// to be treated as a function that's being called with a block, not as
|
|
// an expression passed to `foo`.
|
|
result = ts_compile_grammar(fill_template(grammar_template, {
|
|
{"function_call_precedence", "1"}
|
|
}).c_str());
|
|
|
|
AssertThat(result.error_message, IsNull());
|
|
ts_document_set_language(document, load_compile_result("associativity_example", result));
|
|
ts_document_set_input_string(document, "foo bar { baz }");
|
|
ts_document_parse(document);
|
|
assert_root_node("(expression (function_call "
|
|
"(identifier) "
|
|
"(expression (function_call "
|
|
"(identifier) "
|
|
"(block (expression (identifier)))))))");
|
|
});
|
|
|
|
it("handles precedence applied to specific rule subsequences (regression)", [&]() {
|
|
TSCompileResult result = ts_compile_grammar(R"JSON({
|
|
"name": "precedence_on_subsequence",
|
|
|
|
"extras": [
|
|
{"type": "STRING", "value": " "}
|
|
],
|
|
|
|
"rules": {
|
|
"expression": {
|
|
"type": "PREC_LEFT",
|
|
"value": 0,
|
|
"content": {
|
|
"type": "CHOICE",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "function_call"},
|
|
{"type": "SYMBOL", "name": "identifier"},
|
|
{"type": "SYMBOL", "name": "scope_resolution"}
|
|
]
|
|
}
|
|
},
|
|
|
|
"function_call": {
|
|
"type": "CHOICE",
|
|
"members": [
|
|
{
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "identifier"},
|
|
{"type": "SYMBOL", "name": "expression"}
|
|
]
|
|
},
|
|
|
|
{
|
|
"type": "PREC",
|
|
"value": 1,
|
|
"content": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "identifier"},
|
|
{"type": "SYMBOL", "name": "block"}
|
|
]
|
|
}
|
|
},
|
|
|
|
{
|
|
"type": "PREC",
|
|
"value": -1,
|
|
"content": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "identifier"},
|
|
{"type": "SYMBOL", "name": "do_block"}
|
|
]
|
|
}
|
|
},
|
|
|
|
{
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "identifier"},
|
|
{
|
|
"type": "PREC",
|
|
"value": 1,
|
|
"content": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "SYMBOL", "name": "block"}
|
|
]
|
|
}
|
|
}
|
|
]
|
|
},
|
|
|
|
{
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "identifier"},
|
|
{
|
|
"type": "PREC",
|
|
"value": -1,
|
|
"content": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "SYMBOL", "name": "do_block"}
|
|
]
|
|
}
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
|
|
"scope_resolution": {
|
|
"type": "PREC_LEFT",
|
|
"value": 1,
|
|
"content": {
|
|
"type": "CHOICE",
|
|
"members": [
|
|
{
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "STRING", "value": "::"},
|
|
{"type": "SYMBOL", "name": "expression"}
|
|
]
|
|
},
|
|
{
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "STRING", "value": "::"},
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
]
|
|
}
|
|
]
|
|
}
|
|
},
|
|
|
|
"block": {
|
|
"type": "STRING",
|
|
"value": "{}"
|
|
},
|
|
|
|
"do_block": {
|
|
"type": "STRING",
|
|
"value": "do end"
|
|
},
|
|
|
|
"identifier": {
|
|
"type": "PATTERN",
|
|
"value": "[a-zA-Z]+"
|
|
}
|
|
}
|
|
})JSON");
|
|
|
|
auto language = load_compile_result("precedence_on_subsequence", result);
|
|
ts_document_set_language(document, language);
|
|
|
|
ts_document_set_input_string(document, "a b {}");
|
|
ts_document_parse(document);
|
|
assert_root_node("(expression (function_call "
|
|
"(identifier) "
|
|
"(expression (function_call (identifier) (block)))))");
|
|
|
|
ts_document_set_input_string(document, "a b do end");
|
|
ts_document_parse(document);
|
|
assert_root_node("(expression (function_call "
|
|
"(identifier) "
|
|
"(expression (identifier)) "
|
|
"(do_block)))");
|
|
});
|
|
|
|
it("does not allow conflicting precedences", [&]() {
|
|
string grammar_template = R"JSON({
|
|
"name": "conflicting_precedence_example",
|
|
|
|
"rules": {
|
|
"expression": {
|
|
"type": "CHOICE",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "sum"},
|
|
{"type": "SYMBOL", "name": "product"},
|
|
{"type": "SYMBOL", "name": "other_thing"}
|
|
]
|
|
},
|
|
|
|
"sum": {
|
|
"type": "PREC_LEFT",
|
|
"value": 0,
|
|
"content": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "STRING", "value": "+"},
|
|
{"type": "SYMBOL", "name": "expression"}
|
|
]
|
|
}
|
|
},
|
|
|
|
"product": {
|
|
"type": "PREC_LEFT",
|
|
"value": 1,
|
|
"content": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "STRING", "value": "*"},
|
|
{"type": "SYMBOL", "name": "expression"}
|
|
]
|
|
}
|
|
},
|
|
|
|
"other_thing": {
|
|
"type": "PREC_LEFT",
|
|
"value": -1,
|
|
"content": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "STRING", "value": "*"},
|
|
{"type": "STRING", "value": "*"}
|
|
]
|
|
}
|
|
},
|
|
|
|
"identifier": {
|
|
"type": "PATTERN",
|
|
"value": "[a-zA-Z]+"
|
|
}
|
|
}
|
|
})JSON";
|
|
|
|
TSCompileResult result = ts_compile_grammar(fill_template(grammar_template, {
|
|
}).c_str());
|
|
|
|
AssertThat(result.error_message, Equals(dedent(R"MESSAGE(
|
|
Unresolved conflict for symbol sequence:
|
|
|
|
expression '+' expression • '*' …
|
|
|
|
Possible interpretations:
|
|
|
|
1: (sum expression '+' expression) • '*' …
|
|
2: expression '+' (product expression • '*' expression)
|
|
3: expression '+' (other_thing expression • '*' '*')
|
|
|
|
Possible resolutions:
|
|
|
|
1: Specify a higher precedence in `product` and `other_thing` than in the other rules.
|
|
2: Specify a higher precedence in `sum` than in the other rules.
|
|
3: Add a conflict for these rules: `sum` `product` `other_thing`
|
|
)MESSAGE")));
|
|
});
|
|
});
|
|
|
|
describe("when the grammar contains rules that match the empty string", [&]() {
|
|
it("reports an error", [&]() {
|
|
TSCompileResult result = ts_compile_grammar(R"JSON(
|
|
{
|
|
"name": "empty_rules",
|
|
|
|
"rules": {
|
|
"rule_1": {"type": "SYMBOL", "name": "rule_2"},
|
|
|
|
"rule_2": {
|
|
"type": "CHOICE",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "rule_1"},
|
|
{"type": "BLANK"}
|
|
]
|
|
}
|
|
}
|
|
}
|
|
)JSON");
|
|
|
|
AssertThat(result.error_message, Equals(dedent(R"MESSAGE(
|
|
The rule `rule_2` matches the empty string.
|
|
Tree-sitter currently does not support syntactic rules that match the empty string.
|
|
)MESSAGE")));
|
|
});
|
|
});
|
|
|
|
describe("external scanners", [&]() {
|
|
it("can tokenize using arbitrary user-defined scanner functions", [&]() {
|
|
string grammar = R"JSON({
|
|
"name": "external_scanner_example",
|
|
|
|
"externals": [
|
|
"_percent_string",
|
|
"_percent_string_start",
|
|
"_percent_string_end"
|
|
],
|
|
|
|
"extras": [
|
|
{"type": "PATTERN", "value": "\\s"}
|
|
],
|
|
|
|
"rules": {
|
|
"expression": {
|
|
"type": "CHOICE",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "string"},
|
|
{"type": "SYMBOL", "name": "sum"},
|
|
{"type": "SYMBOL", "name": "identifier"}
|
|
]
|
|
},
|
|
|
|
"sum": {
|
|
"type": "PREC_LEFT",
|
|
"value": 0,
|
|
"content": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "STRING", "value": "+"},
|
|
{"type": "SYMBOL", "name": "expression"}
|
|
]
|
|
}
|
|
},
|
|
|
|
"string": {
|
|
"type": "CHOICE",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "_percent_string"},
|
|
{
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "_percent_string_start"},
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "SYMBOL", "name": "_percent_string_end"}
|
|
]
|
|
},
|
|
]
|
|
},
|
|
|
|
"identifier": {
|
|
"type": "PATTERN",
|
|
"value": "\\a+"
|
|
}
|
|
}
|
|
})JSON";
|
|
|
|
TSCompileResult result = ts_compile_grammar(grammar.c_str());
|
|
AssertThat(result.error_message, IsNull());
|
|
|
|
ts_document_set_language(document, load_compile_result(
|
|
"external_scanner_example",
|
|
result,
|
|
"spec/fixtures/external_scanners/percent_strings.c"
|
|
));
|
|
|
|
ts_document_set_input_string(document, "x + %(sup (external) scanner?)");
|
|
ts_document_parse(document);
|
|
assert_root_node("(expression (sum (expression (identifier)) (expression (string))))");
|
|
|
|
ts_document_set_input_string(document, "%{sup {} #{x + y} {} scanner?}");
|
|
ts_document_parse(document);
|
|
assert_root_node("(expression (string (expression (sum (expression (identifier)) (expression (identifier))))))");
|
|
});
|
|
|
|
it("allows external scanners to refer to tokens that are defined internally", [&]() {
|
|
string grammar = R"JSON({
|
|
"name": "shared_external_tokens",
|
|
|
|
"externals": [
|
|
"string",
|
|
"line_break"
|
|
],
|
|
|
|
"extras": [
|
|
{"type": "PATTERN", "value": "\\s"}
|
|
],
|
|
|
|
"rules": {
|
|
"statement": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "_expression"},
|
|
{"type": "SYMBOL", "name": "_expression"},
|
|
{"type": "SYMBOL", "name": "line_break"}
|
|
]
|
|
},
|
|
|
|
"_expression": {
|
|
"type": "CHOICE",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "string"},
|
|
{"type": "SYMBOL", "name": "variable"},
|
|
{"type": "SYMBOL", "name": "number"}
|
|
]
|
|
},
|
|
|
|
"variable": {"type": "PATTERN", "value": "\\a+"},
|
|
"number": {"type": "PATTERN", "value": "\\d+"},
|
|
"line_break": {"type": "STRING", "value": "\n"}
|
|
}
|
|
})JSON";
|
|
|
|
TSCompileResult result = ts_compile_grammar(grammar.c_str());
|
|
AssertThat(result.error_message, IsNull());
|
|
|
|
ts_document_set_language(document, load_compile_result(
|
|
"shared_external_tokens",
|
|
result,
|
|
"spec/fixtures/external_scanners/shared_external_tokens.c"
|
|
));
|
|
|
|
ts_document_set_input_string(document, "a b\n");
|
|
ts_document_parse(document);
|
|
assert_root_node("(statement (variable) (variable) (line_break))");
|
|
|
|
ts_document_set_input_string(document, "a \nb\n");
|
|
ts_document_parse(document);
|
|
assert_root_node("(statement (variable) (variable) (line_break))");
|
|
|
|
ts_document_set_input_string(document, "'hello' 'world'\n");
|
|
ts_document_parse(document);
|
|
assert_root_node("(statement (string) (string) (line_break))");
|
|
|
|
ts_document_set_input_string(document, "'hello' \n'world'\n");
|
|
ts_document_parse(document);
|
|
assert_root_node("(statement (string) (string) (line_break))");
|
|
});
|
|
|
|
it("allows external tokens to be used as extras", [&]() {
|
|
string grammar = R"JSON({
|
|
"name": "extra_external_tokens",
|
|
|
|
"externals": [
|
|
"comment"
|
|
],
|
|
|
|
"extras": [
|
|
{"type": "PATTERN", "value": "\\s"},
|
|
{"type": "SYMBOL", "name": "comment"}
|
|
],
|
|
|
|
"rules": {
|
|
"assignment": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "variable"},
|
|
{"type": "STRING", "value": "="},
|
|
{"type": "SYMBOL", "name": "variable"}
|
|
]
|
|
},
|
|
|
|
"variable": {"type": "PATTERN", "value": "\\a+"}
|
|
}
|
|
})JSON";
|
|
|
|
TSCompileResult result = ts_compile_grammar(grammar.c_str());
|
|
AssertThat(result.error_message, IsNull());
|
|
|
|
ts_document_set_language(document, load_compile_result(
|
|
"extra_external_tokens",
|
|
result,
|
|
"spec/fixtures/external_scanners/extra_external_tokens.c"
|
|
));
|
|
|
|
ts_document_set_input_string(document, "x = # a comment\n y");
|
|
ts_document_parse(document);
|
|
assert_root_node("(assignment (variable) (comment) (variable))");
|
|
});
|
|
});
|
|
|
|
describe("when the grammar's start symbol is a token", [&]() {
|
|
it("parses the token", [&]() {
|
|
TSCompileResult result = ts_compile_grammar(R"JSON(
|
|
{
|
|
"name": "one_token_language",
|
|
"rules": {
|
|
"first_rule": {"type": "STRING", "value": "the-value"}
|
|
}
|
|
}
|
|
)JSON");
|
|
|
|
ts_document_set_language(document, load_compile_result("one_token_language", result));
|
|
|
|
ts_document_set_input_string(document, "the-value");
|
|
ts_document_parse(document);
|
|
assert_root_node("(first_rule)");
|
|
});
|
|
});
|
|
|
|
describe("when the grammar's start symbol is blank", [&]() {
|
|
it("parses the empty string", [&]() {
|
|
TSCompileResult result = ts_compile_grammar(R"JSON(
|
|
{
|
|
"name": "blank_language",
|
|
"rules": {
|
|
"first_rule": {"type": "BLANK"}
|
|
}
|
|
}
|
|
)JSON");
|
|
|
|
ts_document_set_language(document, load_compile_result("blank_language", result));
|
|
|
|
ts_document_set_input_string(document, "");
|
|
ts_document_parse(document);
|
|
assert_root_node("(first_rule)");
|
|
});
|
|
});
|
|
|
|
describe("when the grammar contains anonymous tokens with escaped characters", [&]() {
|
|
it("escapes the escaped characters properly in the generated parser", [&]() {
|
|
TSCompileResult result = ts_compile_grammar(R"JSON(
|
|
{
|
|
"name": "escaped_char_language",
|
|
"rules": {
|
|
"first_rule": {
|
|
"type": "CHOICE",
|
|
"members": [
|
|
{"type": "STRING", "value": "\n"},
|
|
{"type": "STRING", "value": "\r"},
|
|
{"type": "STRING", "value": "'hello'"},
|
|
{"type": "PATTERN", "value": "\\d+"}
|
|
]
|
|
}
|
|
}
|
|
}
|
|
)JSON");
|
|
|
|
ts_document_set_language(document, load_compile_result("escaped_char_language", result));
|
|
|
|
ts_document_set_input_string(document, "1234");
|
|
ts_document_parse(document);
|
|
assert_root_node("(first_rule)");
|
|
|
|
ts_document_set_input_string(document, "\n");
|
|
ts_document_parse(document);
|
|
assert_root_node("(first_rule)");
|
|
|
|
ts_document_set_input_string(document, "'hello'");
|
|
ts_document_parse(document);
|
|
assert_root_node("(first_rule)");
|
|
});
|
|
});
|
|
|
|
describe("the grammar in the README", [&]() {
|
|
it("parses the input in the README", [&]() {
|
|
TSCompileResult result = ts_compile_grammar(R"JSON(
|
|
{
|
|
"name": "arithmetic",
|
|
|
|
// Things that can appear anywhere in the language, like comments
|
|
// and whitespace, are expressed as 'extras'.
|
|
"extras": [
|
|
{"type": "PATTERN", "value": "\\s"},
|
|
{"type": "SYMBOL", "name": "comment"}
|
|
],
|
|
|
|
"rules": {
|
|
|
|
// The first rule listed in the grammar becomes the 'start rule'.
|
|
"expression": {
|
|
"type": "CHOICE",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "sum"},
|
|
{"type": "SYMBOL", "name": "product"},
|
|
{"type": "SYMBOL", "name": "number"},
|
|
{"type": "SYMBOL", "name": "variable"},
|
|
{
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "STRING", "value": "("},
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "STRING", "value": ")"}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
|
|
// Tokens like '+' and '*' are described directly within the
|
|
// grammar's rules, as opposed to in a seperate lexer description.
|
|
"sum": {
|
|
"type": "PREC_LEFT",
|
|
"value": 1,
|
|
"content": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "STRING", "value": "+"},
|
|
{"type": "SYMBOL", "name": "expression"}
|
|
]
|
|
}
|
|
},
|
|
|
|
// Ambiguities can be resolved at compile time by assigning precedence
|
|
// values to rule subtrees.
|
|
"product": {
|
|
"type": "PREC_LEFT",
|
|
"value": 2,
|
|
"content": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "STRING", "value": "*"},
|
|
{"type": "SYMBOL", "name": "expression"}
|
|
]
|
|
}
|
|
},
|
|
|
|
// Tokens can be specified using ECMAScript regexps.
|
|
"number": {"type": "PATTERN", "value": "\\d+"},
|
|
"comment": {"type": "PATTERN", "value": "#.*"},
|
|
"variable": {"type": "PATTERN", "value": "[a-zA-Z]\\w*"}
|
|
}
|
|
}
|
|
)JSON");
|
|
|
|
const TSLanguage *language = load_compile_result("arithmetic", result);
|
|
|
|
ts_document_set_language(document, language);
|
|
ts_document_set_input_string(document, "a + b * c");
|
|
ts_document_parse(document);
|
|
|
|
assert_root_node(
|
|
"(expression (sum "
|
|
"(expression (variable)) "
|
|
"(expression (product "
|
|
"(expression (variable)) "
|
|
"(expression (variable))))))");
|
|
});
|
|
});
|
|
});
|
|
|
|
END_TEST
|