Tokens that are defined in the grammar's rules may now be included in the externals list also, so that external scanners can check if they are valid lookaheads or not, and if so, can return them to the parser if needed.
816 lines
25 KiB
C++
816 lines
25 KiB
C++
#include "spec_helper.h"
|
|
#include "runtime/alloc.h"
|
|
#include "helpers/load_language.h"
|
|
#include "helpers/stderr_logger.h"
|
|
#include "compiler/util/string_helpers.h"
|
|
#include <map>
|
|
|
|
static string dedent(string input) {
|
|
size_t indent_level = input.find_first_not_of("\n ") - input.find_first_not_of("\n");
|
|
string whitespace = "\n" + string(indent_level, ' ');
|
|
util::str_replace(&input, whitespace, "\n");
|
|
return input.substr(
|
|
input.find_first_not_of("\n "),
|
|
input.find_last_not_of("\n ") + 1
|
|
);
|
|
}
|
|
|
|
static string fill_template(string input, map<string, string> parameters) {
|
|
string result = input;
|
|
for (const auto &pair : parameters) {
|
|
util::str_replace(&result, "{{" + pair.first + "}}", pair.second);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
START_TEST
|
|
|
|
describe("compile_grammar", []() {
|
|
TSDocument *document;
|
|
|
|
before_each([&]() {
|
|
document = ts_document_new();
|
|
});
|
|
|
|
after_each([&]() {
|
|
ts_document_free(document);
|
|
});
|
|
|
|
auto assert_root_node = [&](const string &expected_string) {
|
|
TSNode root_node = ts_document_root_node(document);
|
|
char *node_string = ts_node_string(root_node, document);
|
|
AssertThat(node_string, Equals(expected_string));
|
|
ts_free(node_string);
|
|
};
|
|
|
|
describe("conflicts", [&]() {
|
|
it("can resolve shift/reduce conflicts using associativities", [&]() {
|
|
string grammar_template = R"JSON({
|
|
"name": "associativity_example",
|
|
|
|
"rules": {
|
|
"expression": {
|
|
"type": "CHOICE",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "math_operation"},
|
|
{"type": "SYMBOL", "name": "identifier"}
|
|
]
|
|
},
|
|
|
|
"math_operation": {
|
|
"type": "{{math_operation_prec_type}}",
|
|
"value": 0,
|
|
"content": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "STRING", "value": "+"},
|
|
{"type": "SYMBOL", "name": "expression"}
|
|
]
|
|
}
|
|
},
|
|
|
|
"identifier": {
|
|
"type": "PATTERN",
|
|
"value": "[a-zA-Z]+"
|
|
}
|
|
}
|
|
})JSON";
|
|
|
|
// Ambiguity, which '+' applies first?
|
|
ts_document_set_input_string(document, "x+y+z");
|
|
|
|
TSCompileResult result = ts_compile_grammar(fill_template(grammar_template, {
|
|
{"math_operation_prec_type", "PREC"}
|
|
}).c_str());
|
|
|
|
AssertThat(result.error_message, Equals(dedent(R"MESSAGE(
|
|
Unresolved conflict for symbol sequence:
|
|
|
|
expression '+' expression • '+' …
|
|
|
|
Possible interpretations:
|
|
|
|
1: (math_operation expression '+' expression) • '+' …
|
|
2: expression '+' (math_operation expression • '+' expression)
|
|
|
|
Possible resolutions:
|
|
|
|
1: Specify a left or right associativity in `math_operation`
|
|
2: Add a conflict for these rules: `math_operation`
|
|
)MESSAGE")));
|
|
|
|
result = ts_compile_grammar(fill_template(grammar_template, {
|
|
{"math_operation_prec_type", "PREC_LEFT"}
|
|
}).c_str());
|
|
|
|
ts_document_set_language(document, load_compile_result("associativity_example", result));
|
|
ts_document_parse(document);
|
|
assert_root_node("(expression (math_operation "
|
|
"(expression (math_operation (expression (identifier)) (expression (identifier)))) "
|
|
"(expression (identifier))))");
|
|
|
|
result = ts_compile_grammar(fill_template(grammar_template, {
|
|
{"math_operation_prec_type", "PREC_RIGHT"}
|
|
}).c_str());
|
|
|
|
ts_document_set_language(document, load_compile_result("associativity_example", result));
|
|
ts_document_parse(document);
|
|
assert_root_node("(expression (math_operation "
|
|
"(expression (identifier)) "
|
|
"(expression (math_operation (expression (identifier)) (expression (identifier))))))");
|
|
});
|
|
|
|
it("can resolve shift/reduce conflicts involving single-child rules using precedence", [&]() {
|
|
string grammar_template = R"JSON({
|
|
"name": "associativity_example",
|
|
|
|
"extras": [
|
|
{"type": "PATTERN", "value": "\\s"}
|
|
],
|
|
|
|
"rules": {
|
|
"expression": {
|
|
"type": "CHOICE",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "function_call"},
|
|
{"type": "SYMBOL", "name": "identifier"}
|
|
]
|
|
},
|
|
|
|
"function_call": {
|
|
"type": "PREC_RIGHT",
|
|
"value": {{function_call_precedence}},
|
|
"content": {
|
|
"type": "CHOICE",
|
|
"members": [
|
|
{
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "identifier"},
|
|
{"type": "SYMBOL", "name": "expression"}
|
|
]
|
|
},
|
|
{
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "identifier"},
|
|
{"type": "SYMBOL", "name": "block"}
|
|
]
|
|
},
|
|
{
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "identifier"},
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "SYMBOL", "name": "block"}
|
|
]
|
|
}
|
|
]
|
|
}
|
|
},
|
|
|
|
"block": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "STRING", "value": "{"},
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "STRING", "value": "}"}
|
|
]
|
|
},
|
|
|
|
"identifier": {
|
|
"type": "PATTERN",
|
|
"value": "[a-zA-Z]+"
|
|
}
|
|
}
|
|
})JSON";
|
|
|
|
// Ambiguity: is the trailing block associated with `bar` or `foo`?
|
|
ts_document_set_input_string(document, "foo bar { baz }");
|
|
|
|
TSCompileResult result = ts_compile_grammar(fill_template(grammar_template, {
|
|
{"function_call_precedence", "0"}
|
|
}).c_str());
|
|
|
|
AssertThat(result.error_message, Equals(dedent(R"MESSAGE(
|
|
Unresolved conflict for symbol sequence:
|
|
|
|
identifier • '{' …
|
|
|
|
Possible interpretations:
|
|
|
|
1: (expression identifier) • '{' …
|
|
2: (function_call identifier • block)
|
|
|
|
Possible resolutions:
|
|
|
|
1: Specify a higher precedence in `function_call` than in the other rules.
|
|
2: Specify a higher precedence in `expression` than in the other rules.
|
|
3: Specify a left or right associativity in `expression`
|
|
4: Add a conflict for these rules: `expression` `function_call`
|
|
)MESSAGE")));
|
|
|
|
// Giving function calls lower precedence than expressions causes `bar`
|
|
// to be treated as an expression passed to `foo`, not as a function
|
|
// that's being called with a block.
|
|
result = ts_compile_grammar(fill_template(grammar_template, {
|
|
{"function_call_precedence", "-1"}
|
|
}).c_str());
|
|
|
|
AssertThat(result.error_message, IsNull());
|
|
ts_document_set_language(document, load_compile_result("associativity_example", result));
|
|
ts_document_parse(document);
|
|
assert_root_node("(expression (function_call "
|
|
"(identifier) "
|
|
"(expression (identifier)) "
|
|
"(block (expression (identifier)))))");
|
|
|
|
// Giving function calls higher precedence than expressions causes `bar`
|
|
// to be treated as a function that's being called with a block, not as
|
|
// an expression passed to `foo`.
|
|
result = ts_compile_grammar(fill_template(grammar_template, {
|
|
{"function_call_precedence", "1"}
|
|
}).c_str());
|
|
|
|
AssertThat(result.error_message, IsNull());
|
|
ts_document_set_language(document, load_compile_result("associativity_example", result));
|
|
ts_document_set_input_string(document, "foo bar { baz }");
|
|
ts_document_parse(document);
|
|
assert_root_node("(expression (function_call "
|
|
"(identifier) "
|
|
"(expression (function_call "
|
|
"(identifier) "
|
|
"(block (expression (identifier)))))))");
|
|
});
|
|
|
|
it("handles precedence applied to specific rule subsequences (regression)", [&]() {
|
|
TSCompileResult result = ts_compile_grammar(R"JSON({
|
|
"name": "precedence_on_subsequence",
|
|
|
|
"extras": [
|
|
{"type": "STRING", "value": " "}
|
|
],
|
|
|
|
"rules": {
|
|
"expression": {
|
|
"type": "PREC_LEFT",
|
|
"value": 0,
|
|
"content": {
|
|
"type": "CHOICE",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "function_call"},
|
|
{"type": "SYMBOL", "name": "identifier"},
|
|
{"type": "SYMBOL", "name": "scope_resolution"}
|
|
]
|
|
}
|
|
},
|
|
|
|
"function_call": {
|
|
"type": "CHOICE",
|
|
"members": [
|
|
{
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "identifier"},
|
|
{"type": "SYMBOL", "name": "expression"}
|
|
]
|
|
},
|
|
|
|
{
|
|
"type": "PREC",
|
|
"value": 1,
|
|
"content": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "identifier"},
|
|
{"type": "SYMBOL", "name": "block"}
|
|
]
|
|
}
|
|
},
|
|
|
|
{
|
|
"type": "PREC",
|
|
"value": -1,
|
|
"content": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "identifier"},
|
|
{"type": "SYMBOL", "name": "do_block"}
|
|
]
|
|
}
|
|
},
|
|
|
|
{
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "identifier"},
|
|
{
|
|
"type": "PREC",
|
|
"value": 1,
|
|
"content": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "SYMBOL", "name": "block"}
|
|
]
|
|
}
|
|
}
|
|
]
|
|
},
|
|
|
|
{
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "identifier"},
|
|
{
|
|
"type": "PREC",
|
|
"value": -1,
|
|
"content": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "SYMBOL", "name": "do_block"}
|
|
]
|
|
}
|
|
}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
|
|
"scope_resolution": {
|
|
"type": "PREC_LEFT",
|
|
"value": 1,
|
|
"content": {
|
|
"type": "CHOICE",
|
|
"members": [
|
|
{
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "STRING", "value": "::"},
|
|
{"type": "SYMBOL", "name": "expression"}
|
|
]
|
|
},
|
|
{
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "STRING", "value": "::"},
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
]
|
|
}
|
|
]
|
|
}
|
|
},
|
|
|
|
"block": {
|
|
"type": "STRING",
|
|
"value": "{}"
|
|
},
|
|
|
|
"do_block": {
|
|
"type": "STRING",
|
|
"value": "do end"
|
|
},
|
|
|
|
"identifier": {
|
|
"type": "PATTERN",
|
|
"value": "[a-zA-Z]+"
|
|
}
|
|
}
|
|
})JSON");
|
|
|
|
auto language = load_compile_result("precedence_on_subsequence", result);
|
|
ts_document_set_language(document, language);
|
|
|
|
ts_document_set_input_string(document, "a b {}");
|
|
ts_document_parse(document);
|
|
assert_root_node("(expression (function_call "
|
|
"(identifier) "
|
|
"(expression (function_call (identifier) (block)))))");
|
|
|
|
ts_document_set_input_string(document, "a b do end");
|
|
ts_document_parse(document);
|
|
assert_root_node("(expression (function_call "
|
|
"(identifier) "
|
|
"(expression (identifier)) "
|
|
"(do_block)))");
|
|
});
|
|
|
|
it("does not allow conflicting precedences", [&]() {
|
|
string grammar_template = R"JSON({
|
|
"name": "conflicting_precedence_example",
|
|
|
|
"rules": {
|
|
"expression": {
|
|
"type": "CHOICE",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "sum"},
|
|
{"type": "SYMBOL", "name": "product"},
|
|
{"type": "SYMBOL", "name": "other_thing"}
|
|
]
|
|
},
|
|
|
|
"sum": {
|
|
"type": "PREC_LEFT",
|
|
"value": 0,
|
|
"content": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "STRING", "value": "+"},
|
|
{"type": "SYMBOL", "name": "expression"}
|
|
]
|
|
}
|
|
},
|
|
|
|
"product": {
|
|
"type": "PREC_LEFT",
|
|
"value": 1,
|
|
"content": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "STRING", "value": "*"},
|
|
{"type": "SYMBOL", "name": "expression"}
|
|
]
|
|
}
|
|
},
|
|
|
|
"other_thing": {
|
|
"type": "PREC_LEFT",
|
|
"value": -1,
|
|
"content": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "STRING", "value": "*"},
|
|
{"type": "STRING", "value": "*"}
|
|
]
|
|
}
|
|
},
|
|
|
|
"identifier": {
|
|
"type": "PATTERN",
|
|
"value": "[a-zA-Z]+"
|
|
}
|
|
}
|
|
})JSON";
|
|
|
|
TSCompileResult result = ts_compile_grammar(fill_template(grammar_template, {
|
|
}).c_str());
|
|
|
|
AssertThat(result.error_message, Equals(dedent(R"MESSAGE(
|
|
Unresolved conflict for symbol sequence:
|
|
|
|
expression '+' expression • '*' …
|
|
|
|
Possible interpretations:
|
|
|
|
1: (sum expression '+' expression) • '*' …
|
|
2: expression '+' (product expression • '*' expression)
|
|
3: expression '+' (other_thing expression • '*' '*')
|
|
|
|
Possible resolutions:
|
|
|
|
1: Specify a higher precedence in `product` and `other_thing` than in the other rules.
|
|
2: Specify a higher precedence in `sum` than in the other rules.
|
|
3: Add a conflict for these rules: `sum` `product` `other_thing`
|
|
)MESSAGE")));
|
|
});
|
|
});
|
|
|
|
describe("when the grammar contains rules that match the empty string", [&]() {
|
|
it("reports an error", [&]() {
|
|
TSCompileResult result = ts_compile_grammar(R"JSON(
|
|
{
|
|
"name": "empty_rules",
|
|
|
|
"rules": {
|
|
"rule_1": {"type": "SYMBOL", "name": "rule_2"},
|
|
|
|
"rule_2": {
|
|
"type": "CHOICE",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "rule_1"},
|
|
{"type": "BLANK"}
|
|
]
|
|
}
|
|
}
|
|
}
|
|
)JSON");
|
|
|
|
AssertThat(result.error_message, Equals(dedent(R"MESSAGE(
|
|
The rule `rule_2` matches the empty string.
|
|
Tree-sitter currently does not support syntactic rules that match the empty string.
|
|
)MESSAGE")));
|
|
});
|
|
});
|
|
|
|
describe("external scanners", [&]() {
|
|
it("can tokenize using arbitrary user-defined scanner functions", [&]() {
|
|
string grammar = R"JSON({
|
|
"name": "external_scanner_example",
|
|
|
|
"externals": [
|
|
"_percent_string",
|
|
"_percent_string_start",
|
|
"_percent_string_end"
|
|
],
|
|
|
|
"extras": [
|
|
{"type": "PATTERN", "value": "\\s"}
|
|
],
|
|
|
|
"rules": {
|
|
"expression": {
|
|
"type": "CHOICE",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "string"},
|
|
{"type": "SYMBOL", "name": "sum"},
|
|
{"type": "SYMBOL", "name": "identifier"}
|
|
]
|
|
},
|
|
|
|
"sum": {
|
|
"type": "PREC_LEFT",
|
|
"value": 0,
|
|
"content": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "STRING", "value": "+"},
|
|
{"type": "SYMBOL", "name": "expression"}
|
|
]
|
|
}
|
|
},
|
|
|
|
"string": {
|
|
"type": "CHOICE",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "_percent_string"},
|
|
{
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "_percent_string_start"},
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "SYMBOL", "name": "_percent_string_end"}
|
|
]
|
|
},
|
|
]
|
|
},
|
|
|
|
"identifier": {
|
|
"type": "PATTERN",
|
|
"value": "\\a+"
|
|
}
|
|
}
|
|
})JSON";
|
|
|
|
TSCompileResult result = ts_compile_grammar(grammar.c_str());
|
|
AssertThat(result.error_message, IsNull());
|
|
|
|
ts_document_set_language(document, load_compile_result(
|
|
"external_scanner_example",
|
|
result,
|
|
"spec/fixtures/external_scanners/percent_strings.c"
|
|
));
|
|
|
|
ts_document_set_input_string(document, "x + %(sup (external) scanner?)");
|
|
ts_document_parse(document);
|
|
assert_root_node("(expression (sum (expression (identifier)) (expression (string))))");
|
|
|
|
ts_document_set_input_string(document, "%{sup {} #{x + y} {} scanner?}");
|
|
ts_document_parse(document);
|
|
assert_root_node("(expression (string (expression (sum (expression (identifier)) (expression (identifier))))))");
|
|
});
|
|
|
|
it("allows external scanners to refer to tokens that are defined internally", [&]() {
|
|
string grammar = R"JSON({
|
|
"name": "shared_external_tokens",
|
|
|
|
"externals": [
|
|
"string",
|
|
"line_break"
|
|
],
|
|
|
|
"extras": [
|
|
{"type": "PATTERN", "value": "\\s"}
|
|
],
|
|
|
|
"rules": {
|
|
"statement": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "_expression"},
|
|
{"type": "SYMBOL", "name": "_expression"},
|
|
{"type": "SYMBOL", "name": "line_break"}
|
|
]
|
|
},
|
|
|
|
"_expression": {
|
|
"type": "CHOICE",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "string"},
|
|
{"type": "SYMBOL", "name": "variable"},
|
|
{"type": "SYMBOL", "name": "number"}
|
|
]
|
|
},
|
|
|
|
"variable": {"type": "PATTERN", "value": "\\a+"},
|
|
"number": {"type": "PATTERN", "value": "\\d+"},
|
|
"line_break": {"type": "STRING", "value": "\n"}
|
|
}
|
|
})JSON";
|
|
|
|
TSCompileResult result = ts_compile_grammar(grammar.c_str());
|
|
AssertThat(result.error_message, IsNull());
|
|
|
|
ts_document_set_language(document, load_compile_result(
|
|
"shared_external_tokens",
|
|
result,
|
|
"spec/fixtures/external_scanners/line_breaks.c"
|
|
));
|
|
|
|
ts_document_set_input_string(document, "a b\n");
|
|
ts_document_parse(document);
|
|
assert_root_node("(statement (variable) (variable) (line_break))");
|
|
|
|
ts_document_set_input_string(document, "a \nb\n");
|
|
ts_document_parse(document);
|
|
assert_root_node("(statement (variable) (variable) (line_break))");
|
|
|
|
|
|
ts_document_set_input_string(document, "'hello' 'world'\n");
|
|
ts_document_parse(document);
|
|
assert_root_node("(statement (string) (string) (line_break))");
|
|
|
|
ts_document_set_input_string(document, "'hello' \n'world'\n");
|
|
ts_document_parse(document);
|
|
assert_root_node("(statement (string) (string) (line_break))");
|
|
});
|
|
});
|
|
|
|
describe("when the grammar's start symbol is a token", [&]() {
|
|
it("parses the token", [&]() {
|
|
TSCompileResult result = ts_compile_grammar(R"JSON(
|
|
{
|
|
"name": "one_token_language",
|
|
"rules": {
|
|
"first_rule": {"type": "STRING", "value": "the-value"}
|
|
}
|
|
}
|
|
)JSON");
|
|
|
|
ts_document_set_language(document, load_compile_result("one_token_language", result));
|
|
|
|
ts_document_set_input_string(document, "the-value");
|
|
ts_document_parse(document);
|
|
assert_root_node("(first_rule)");
|
|
});
|
|
});
|
|
|
|
describe("when the grammar's start symbol is blank", [&]() {
|
|
it("parses the empty string", [&]() {
|
|
TSCompileResult result = ts_compile_grammar(R"JSON(
|
|
{
|
|
"name": "blank_language",
|
|
"rules": {
|
|
"first_rule": {"type": "BLANK"}
|
|
}
|
|
}
|
|
)JSON");
|
|
|
|
ts_document_set_language(document, load_compile_result("blank_language", result));
|
|
|
|
ts_document_set_input_string(document, "");
|
|
ts_document_parse(document);
|
|
assert_root_node("(first_rule)");
|
|
});
|
|
});
|
|
|
|
describe("when the grammar contains anonymous tokens with escaped characters", [&]() {
|
|
it("escapes the escaped characters properly in the generated parser", [&]() {
|
|
TSCompileResult result = ts_compile_grammar(R"JSON(
|
|
{
|
|
"name": "escaped_char_language",
|
|
"rules": {
|
|
"first_rule": {
|
|
"type": "CHOICE",
|
|
"members": [
|
|
{"type": "STRING", "value": "\n"},
|
|
{"type": "STRING", "value": "\r"},
|
|
{"type": "STRING", "value": "'hello'"},
|
|
{"type": "PATTERN", "value": "\\d+"}
|
|
]
|
|
}
|
|
}
|
|
}
|
|
)JSON");
|
|
|
|
ts_document_set_language(document, load_compile_result("escaped_char_language", result));
|
|
|
|
ts_document_set_input_string(document, "1234");
|
|
ts_document_parse(document);
|
|
assert_root_node("(first_rule)");
|
|
|
|
ts_document_set_input_string(document, "\n");
|
|
ts_document_parse(document);
|
|
assert_root_node("(first_rule)");
|
|
|
|
ts_document_set_input_string(document, "'hello'");
|
|
ts_document_parse(document);
|
|
assert_root_node("(first_rule)");
|
|
});
|
|
});
|
|
|
|
describe("the grammar in the README", [&]() {
|
|
it("parses the input in the README", [&]() {
|
|
TSCompileResult result = ts_compile_grammar(R"JSON(
|
|
{
|
|
"name": "arithmetic",
|
|
|
|
// Things that can appear anywhere in the language, like comments
|
|
// and whitespace, are expressed as 'extras'.
|
|
"extras": [
|
|
{"type": "PATTERN", "value": "\\s"},
|
|
{"type": "SYMBOL", "name": "comment"}
|
|
],
|
|
|
|
"rules": {
|
|
|
|
// The first rule listed in the grammar becomes the 'start rule'.
|
|
"expression": {
|
|
"type": "CHOICE",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "sum"},
|
|
{"type": "SYMBOL", "name": "product"},
|
|
{"type": "SYMBOL", "name": "number"},
|
|
{"type": "SYMBOL", "name": "variable"},
|
|
{
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "STRING", "value": "("},
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "STRING", "value": ")"}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
|
|
// Tokens like '+' and '*' are described directly within the
|
|
// grammar's rules, as opposed to in a seperate lexer description.
|
|
"sum": {
|
|
"type": "PREC_LEFT",
|
|
"value": 1,
|
|
"content": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "STRING", "value": "+"},
|
|
{"type": "SYMBOL", "name": "expression"}
|
|
]
|
|
}
|
|
},
|
|
|
|
// Ambiguities can be resolved at compile time by assigning precedence
|
|
// values to rule subtrees.
|
|
"product": {
|
|
"type": "PREC_LEFT",
|
|
"value": 2,
|
|
"content": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "STRING", "value": "*"},
|
|
{"type": "SYMBOL", "name": "expression"}
|
|
]
|
|
}
|
|
},
|
|
|
|
// Tokens can be specified using ECMAScript regexps.
|
|
"number": {"type": "PATTERN", "value": "\\d+"},
|
|
"comment": {"type": "PATTERN", "value": "#.*"},
|
|
"variable": {"type": "PATTERN", "value": "[a-zA-Z]\\w*"}
|
|
}
|
|
}
|
|
)JSON");
|
|
|
|
const TSLanguage *language = load_compile_result("arithmetic", result);
|
|
|
|
ts_document_set_language(document, language);
|
|
ts_document_set_input_string(document, "a + b * c");
|
|
ts_document_parse(document);
|
|
|
|
assert_root_node(
|
|
"(expression (sum "
|
|
"(expression (variable)) "
|
|
"(expression (product "
|
|
"(expression (variable)) "
|
|
"(expression (variable))))))");
|
|
});
|
|
});
|
|
});
|
|
|
|
END_TEST
|