193 lines
5.9 KiB
C++
193 lines
5.9 KiB
C++
#include "spec_helper.h"
|
|
#include "runtime/alloc.h"
|
|
#include "helpers/load_language.h"
|
|
|
|
START_TEST
|
|
|
|
describe("compile_grammar", []() {
|
|
TSDocument *document;
|
|
|
|
before_each([&]() {
|
|
document = ts_document_make();
|
|
});
|
|
|
|
after_each([&]() {
|
|
ts_document_free(document);
|
|
});
|
|
|
|
auto assert_root_node = [&](const string &expected_string) {
|
|
TSNode root_node = ts_document_root_node(document);
|
|
char *node_string = ts_node_string(root_node, document);
|
|
AssertThat(node_string, Equals(expected_string));
|
|
ts_free(node_string);
|
|
};
|
|
|
|
describe("when the grammar's start symbol is a token", [&]() {
|
|
it("parses the token", [&]() {
|
|
TSCompileResult result = ts_compile_grammar(R"JSON(
|
|
{
|
|
"name": "one_token_language",
|
|
"rules": {
|
|
"first_rule": {"type": "STRING", "value": "the-value"}
|
|
}
|
|
}
|
|
)JSON");
|
|
|
|
ts_document_set_language(document, load_language("one_token_language", result));
|
|
|
|
ts_document_set_input_string(document, "the-value");
|
|
ts_document_parse(document);
|
|
assert_root_node("(first_rule)");
|
|
});
|
|
});
|
|
|
|
describe("when the grammar's start symbol is blank", [&]() {
|
|
it("parses the empty string", [&]() {
|
|
TSCompileResult result = ts_compile_grammar(R"JSON(
|
|
{
|
|
"name": "blank_language",
|
|
"rules": {
|
|
"first_rule": {"type": "BLANK"}
|
|
}
|
|
}
|
|
)JSON");
|
|
|
|
ts_document_set_language(document, load_language("blank_language", result));
|
|
|
|
ts_document_set_input_string(document, "");
|
|
ts_document_parse(document);
|
|
assert_root_node("(first_rule)");
|
|
});
|
|
});
|
|
|
|
describe("when the grammar contains anonymous tokens with escaped characters", [&]() {
|
|
it("escapes the escaped characters properly in the generated parser", [&]() {
|
|
TSCompileResult result = ts_compile_grammar(R"JSON(
|
|
{
|
|
"name": "escaped_char_language",
|
|
"rules": {
|
|
"first_rule": {
|
|
"type": "CHOICE",
|
|
"members": [
|
|
{"type": "STRING", "value": "\n"},
|
|
{"type": "STRING", "value": "\r"},
|
|
{"type": "STRING", "value": "'hello'"},
|
|
{"type": "PATTERN", "value": "\\d+"}
|
|
]
|
|
}
|
|
}
|
|
}
|
|
)JSON");
|
|
|
|
ts_document_set_language(document, load_language("escaped_char_language", result));
|
|
|
|
ts_document_set_input_string(document, "1234");
|
|
ts_document_parse(document);
|
|
assert_root_node("(first_rule)");
|
|
|
|
ts_document_set_input_string(document, "\n");
|
|
ts_document_parse(document);
|
|
assert_root_node("(first_rule)");
|
|
|
|
ts_document_set_input_string(document, "'hello'");
|
|
ts_document_parse(document);
|
|
assert_root_node("(first_rule)");
|
|
});
|
|
});
|
|
|
|
describe("the grammar in the README", [&]() {
|
|
it("parses the input in the README", [&]() {
|
|
TSCompileResult result = ts_compile_grammar(R"JSON(
|
|
{
|
|
"name": "arithmetic",
|
|
|
|
// Things that can appear anywhere in the language, like comments
|
|
// and whitespace, are expressed as 'extras'.
|
|
"extras": [
|
|
{"type": "PATTERN", "value": "\\s"},
|
|
{"type": "SYMBOL", "name": "comment"}
|
|
],
|
|
|
|
"rules": {
|
|
|
|
// The first rule listed in the grammar becomes the 'start rule'.
|
|
"expression": {
|
|
"type": "CHOICE",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "sum"},
|
|
{"type": "SYMBOL", "name": "product"},
|
|
{"type": "SYMBOL", "name": "number"},
|
|
{"type": "SYMBOL", "name": "variable"},
|
|
{
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "STRING", "value": "("},
|
|
|
|
// Error recovery is controlled by wrapping rule subtrees
|
|
// in an 'ERROR' rule.
|
|
{
|
|
"type": "ERROR",
|
|
"content": {"type": "SYMBOL", "name": "expression"}
|
|
},
|
|
|
|
{"type": "STRING", "value": ")"}
|
|
]
|
|
}
|
|
]
|
|
},
|
|
|
|
// Tokens like '+' and '*' are described directly within the
|
|
// grammar's rules, as opposed to in a seperate lexer description.
|
|
"sum": {
|
|
"type": "PREC_LEFT",
|
|
"value": 1,
|
|
"content": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "STRING", "value": "+"},
|
|
{"type": "SYMBOL", "name": "expression"}
|
|
]
|
|
}
|
|
},
|
|
|
|
// Ambiguities can be resolved at compile time by assigning precedence
|
|
// values to rule subtrees.
|
|
"product": {
|
|
"type": "PREC_LEFT",
|
|
"value": 2,
|
|
"content": {
|
|
"type": "SEQ",
|
|
"members": [
|
|
{"type": "SYMBOL", "name": "expression"},
|
|
{"type": "STRING", "value": "*"},
|
|
{"type": "SYMBOL", "name": "expression"}
|
|
]
|
|
}
|
|
},
|
|
|
|
// Tokens can be specified using ECMAScript regexps.
|
|
"number": {"type": "PATTERN", "value": "\\d+"},
|
|
"comment": {"type": "PATTERN", "value": "#.*"},
|
|
"variable": {"type": "PATTERN", "value": "[a-zA-Z]\\w*"}
|
|
}
|
|
}
|
|
)JSON");
|
|
|
|
const TSLanguage *language = load_language("arithmetic", result);
|
|
|
|
ts_document_set_language(document, language);
|
|
ts_document_set_input_string(document, "a + b * c");
|
|
ts_document_parse(document);
|
|
|
|
assert_root_node(
|
|
"(expression (sum "
|
|
"(expression (variable)) "
|
|
"(expression (product "
|
|
"(expression (variable)) "
|
|
"(expression (variable))))))");
|
|
});
|
|
});
|
|
});
|
|
|
|
END_TEST
|