tree-sitter/spec/runtime/parser_spec.cc

452 lines
14 KiB
C++
Raw Normal View History

#include "spec_helper.h"
2016-02-02 12:03:11 -08:00
#include "runtime/alloc.h"
#include "helpers/record_alloc.h"
#include "helpers/spy_input.h"
#include "helpers/load_language.h"
#include "helpers/record_alloc.h"
2014-09-10 18:49:53 -07:00
START_TEST
describe("Parser", [&]() {
TSDocument *doc;
2015-07-16 17:32:19 -07:00
SpyInput *input;
TSNode root;
size_t chunk_size;
2014-09-10 18:49:53 -07:00
before_each([&]() {
2016-02-02 12:03:11 -08:00
record_alloc::start();
chunk_size = 3;
2015-07-16 17:32:19 -07:00
input = nullptr;
doc = ts_document_new();
2014-09-10 18:49:53 -07:00
});
after_each([&]() {
if (doc)
ts_document_free(doc);
2015-07-16 17:32:19 -07:00
if (input)
delete input;
2016-02-02 12:03:11 -08:00
record_alloc::stop();
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
2014-09-10 18:49:53 -07:00
});
auto set_text = [&](const char *text) {
2015-07-16 17:32:19 -07:00
input = new SpyInput(text, chunk_size);
ts_document_set_input(doc, input->input());
2016-11-04 09:18:38 -07:00
ts_document_parse(doc);
2014-09-10 18:49:53 -07:00
root = ts_document_root_node(doc);
AssertThat(ts_node_end_byte(root), Equals(strlen(text)));
2015-07-16 17:32:19 -07:00
input->clear();
2014-09-10 18:49:53 -07:00
};
auto insert_text = [&](size_t position, string text) {
size_t prev_size = ts_node_end_byte(root);
2015-09-19 14:46:14 -07:00
ts_document_edit(doc, input->replace(position, 0, text));
ts_document_parse(doc);
2014-10-05 16:56:50 -07:00
2014-09-10 18:49:53 -07:00
root = ts_document_root_node(doc);
size_t new_size = ts_node_end_byte(root);
2014-10-05 16:56:50 -07:00
AssertThat(new_size, Equals(prev_size + text.size()));
2014-09-10 18:49:53 -07:00
};
auto delete_text = [&](size_t position, size_t length) {
size_t prev_size = ts_node_end_byte(root);
2015-09-19 14:46:14 -07:00
ts_document_edit(doc, input->replace(position, length, ""));
ts_document_parse(doc);
2014-10-05 16:56:50 -07:00
2014-09-10 18:49:53 -07:00
root = ts_document_root_node(doc);
size_t new_size = ts_node_end_byte(root);
2014-10-05 16:56:50 -07:00
AssertThat(new_size, Equals(prev_size - length));
2014-09-10 18:49:53 -07:00
};
auto replace_text = [&](size_t position, size_t length, string new_text) {
size_t prev_size = ts_node_end_byte(root);
2015-09-19 14:46:14 -07:00
ts_document_edit(doc, input->replace(position, length, new_text));
ts_document_parse(doc);
root = ts_document_root_node(doc);
size_t new_size = ts_node_end_byte(root);
AssertThat(new_size, Equals(prev_size - length + new_text.size()));
};
2016-02-02 12:03:11 -08:00
auto assert_root_node = [&](const string &expected) {
TSNode node = ts_document_root_node(doc);
char *str = ts_node_string(node, doc);
string actual(str);
ts_free(str);
2016-02-02 12:03:11 -08:00
AssertThat(actual, Equals(expected));
};
2014-09-10 18:49:53 -07:00
describe("handling errors", [&]() {
before_each([&]() {
ts_document_set_language(doc, get_test_language("json"));
2014-09-10 18:49:53 -07:00
});
auto get_node_text = [&](TSNode node) {
size_t start = ts_node_start_byte(node);
size_t end = ts_node_end_byte(node);
return input->content.substr(start, end - start);
};
describe("when there is an invalid substring right before a valid token", [&]() {
2014-09-10 18:49:53 -07:00
it("computes the error node's size and position correctly", [&]() {
set_text(" [123, @@@@@, true]");
2014-09-10 18:49:53 -07:00
2016-02-02 12:03:11 -08:00
assert_root_node(
"(array (number) (ERROR (UNEXPECTED '@')) (true))");
2014-09-10 18:49:53 -07:00
TSNode error = ts_node_named_child(root, 1);
2016-09-06 21:43:59 -07:00
AssertThat(ts_node_type(error, doc), Equals("ERROR"));
AssertThat(get_node_text(error), Equals(", @@@@@"));
AssertThat(ts_node_child_count(error), Equals<size_t>(2));
TSNode comma = ts_node_child(error, 0);
AssertThat(get_node_text(comma), Equals(","));
TSNode garbage = ts_node_child(error, 1);
AssertThat(get_node_text(garbage), Equals("@@@@@"));
TSNode node_after_error = ts_node_named_child(root, 2);
2016-09-06 21:43:59 -07:00
AssertThat(ts_node_type(node_after_error, doc), Equals("true"));
AssertThat(get_node_text(node_after_error), Equals("true"));
2014-09-10 18:49:53 -07:00
});
});
describe("when there is an unexpected string in the middle of a token", [&]() {
2014-09-10 18:49:53 -07:00
it("computes the error node's size and position correctly", [&]() {
set_text(" [123, faaaaalse, true]");
2016-02-02 12:03:11 -08:00
assert_root_node(
"(array (number) (ERROR (UNEXPECTED 'a')) (true))");
2014-09-10 18:49:53 -07:00
TSNode error = ts_node_named_child(root, 1);
2016-09-06 21:43:59 -07:00
AssertThat(ts_node_type(error, doc), Equals("ERROR"));
AssertThat(ts_node_child_count(error), Equals<size_t>(2));
TSNode comma = ts_node_child(error, 0);
2016-09-06 21:43:59 -07:00
AssertThat(ts_node_type(comma, doc), Equals(","));
AssertThat(get_node_text(comma), Equals(","));
2014-09-10 18:49:53 -07:00
TSNode garbage = ts_node_child(error, 1);
2016-09-06 21:43:59 -07:00
AssertThat(ts_node_type(garbage, doc), Equals("ERROR"));
AssertThat(get_node_text(garbage), Equals("faaaaalse"));
TSNode last = ts_node_named_child(root, 2);
2016-09-06 21:43:59 -07:00
AssertThat(ts_node_type(last, doc), Equals("true"));
AssertThat(ts_node_start_byte(last), Equals(strlen(" [123, faaaaalse, ")));
2014-09-10 18:49:53 -07:00
});
});
describe("when there is one unexpected token between two valid tokens", [&]() {
2014-09-10 18:49:53 -07:00
it("computes the error node's size and position correctly", [&]() {
set_text(" [123, true false, true]");
2016-02-02 12:03:11 -08:00
assert_root_node(
"(array (number) (true) (ERROR (false)) (true))");
2014-09-10 18:49:53 -07:00
TSNode error = ts_node_named_child(root, 2);
2016-09-06 21:43:59 -07:00
AssertThat(ts_node_type(error, doc), Equals("ERROR"));
AssertThat(get_node_text(error), Equals("false"));
AssertThat(ts_node_child_count(error), Equals<size_t>(1));
2014-09-10 18:49:53 -07:00
TSNode last = ts_node_named_child(root, 1);
2016-09-06 21:43:59 -07:00
AssertThat(ts_node_type(last, doc), Equals("true"));
AssertThat(get_node_text(last), Equals("true"));
2014-09-10 18:49:53 -07:00
});
});
2016-09-03 22:46:14 -07:00
describe("when there is an unexpected string at the end of a token", [&]() {
it("computes the error's size and position correctly", [&]() {
set_text(" [123, \"hi\n, true]");
assert_root_node(
"(array (number) (ERROR (UNEXPECTED '\\n')) (true))");
});
});
2016-09-03 22:46:14 -07:00
describe("when there is an unterminated error", [&]() {
it("maintains a consistent tree", [&]() {
ts_document_set_language(doc, get_test_language("javascript"));
set_text("a; /* b");
assert_root_node(
"(ERROR (program (expression_statement (identifier))) (UNEXPECTED EOF))");
});
});
2014-09-10 18:49:53 -07:00
});
describe("handling extra tokens", [&]() {
2014-09-10 18:49:53 -07:00
// In the javascript example grammar, ASI works by using newlines as
// terminators in statements, but also as extra tokens.
2014-09-10 18:49:53 -07:00
before_each([&]() {
ts_document_set_language(doc, get_test_language("javascript"));
2014-09-10 18:49:53 -07:00
});
describe("when the token appears as part of a grammar rule", [&]() {
it("is incorporated into the tree", [&]() {
set_text("fn()\n");
2016-02-02 12:03:11 -08:00
assert_root_node(
2016-09-01 17:45:35 -07:00
"(program (expression_statement (function_call (identifier) (arguments))))");
2014-09-10 18:49:53 -07:00
});
});
describe("when the token appears somewhere else", [&]() {
it("is incorporated into the tree", [&]() {
set_text(
"fn()\n"
" .otherFn();");
2016-02-02 12:03:11 -08:00
assert_root_node(
"(program (expression_statement (function_call "
2016-09-01 17:45:35 -07:00
"(member_access "
"(function_call (identifier) (arguments)) "
"(identifier)) "
"(arguments))))");
2014-09-10 18:49:53 -07:00
});
});
describe("when several extra tokens appear in a row", [&]() {
2014-09-10 18:49:53 -07:00
it("is incorporated into the tree", [&]() {
set_text(
"fn()\n\n"
"// This is a comment"
"\n\n"
".otherFn();");
2016-02-02 12:03:11 -08:00
assert_root_node(
"(program (expression_statement (function_call "
2016-09-01 17:45:35 -07:00
"(member_access "
"(function_call (identifier) (arguments)) "
2015-08-22 10:48:34 -07:00
"(comment) "
2016-09-01 17:45:35 -07:00
"(identifier)) "
"(arguments))))");
2014-09-10 18:49:53 -07:00
});
});
});
describe("editing", [&]() {
before_each([&]() {
ts_document_set_language(doc, get_test_language("javascript"));
2014-09-10 18:49:53 -07:00
});
describe("inserting text", [&]() {
2014-10-14 23:12:26 -07:00
describe("creating new tokens near the end of the input", [&]() {
it("updates the parse tree and re-reads only the changed portion of the text", [&]() {
set_text("x * (100 + abc);");
2016-02-02 12:03:11 -08:00
assert_root_node(
"(program (expression_statement (math_op "
"(identifier) "
2016-02-02 12:03:11 -08:00
"(math_op (number) (identifier)))))");
insert_text(strlen("x * (100 + abc"), ".d");
2016-02-02 12:03:11 -08:00
assert_root_node(
"(program (expression_statement (math_op "
"(identifier) "
2016-02-02 12:03:11 -08:00
"(math_op (number) (member_access (identifier) (identifier))))))");
AssertThat(input->strings_read, Equals(vector<string>({ " + abc.d)" })));
});
});
2014-09-10 18:49:53 -07:00
2014-10-14 23:12:26 -07:00
describe("creating new tokens near the beginning of the input", [&]() {
it("updates the parse tree and re-reads only the changed portion of the input", [&]() {
chunk_size = 2;
set_text("123 + 456 * (10 + x);");
2014-09-10 18:49:53 -07:00
2016-02-02 12:03:11 -08:00
assert_root_node(
"(program (expression_statement (math_op "
2015-07-31 16:29:01 -07:00
"(number) "
2016-02-02 12:03:11 -08:00
"(math_op (number) (math_op (number) (identifier))))))");
2014-09-10 18:49:53 -07:00
insert_text(strlen("123"), " || 5");
2016-02-02 12:03:11 -08:00
assert_root_node(
"(program (expression_statement (bool_op "
2015-07-31 16:29:01 -07:00
"(number) "
"(math_op "
2015-07-31 16:29:01 -07:00
"(number) "
2016-02-02 12:03:11 -08:00
"(math_op (number) (math_op (number) (identifier)))))))");
2014-09-10 18:49:53 -07:00
AssertThat(input->strings_read, Equals(vector<string>({ "123 || 5 +" })));
});
2014-09-10 18:49:53 -07:00
});
2014-10-14 23:12:26 -07:00
describe("introducing an error", [&]() {
it("gives the error the right size", [&]() {
ts_document_set_language(doc, get_test_language("javascript"));
2014-10-14 23:12:26 -07:00
set_text("var x = y;");
2016-02-02 12:03:11 -08:00
assert_root_node(
"(program (var_declaration (var_assignment "
2016-02-02 12:03:11 -08:00
"(identifier) (identifier))))");
2014-10-14 23:12:26 -07:00
insert_text(strlen("var x = y"), " *");
2016-02-02 12:03:11 -08:00
assert_root_node(
"(program (var_declaration (var_assignment "
"(identifier) (identifier)) (ERROR)))");
2014-10-14 23:12:26 -07:00
insert_text(strlen("var x = y *"), " z");
2016-02-02 12:03:11 -08:00
assert_root_node(
"(program (var_declaration (var_assignment "
2016-02-02 12:03:11 -08:00
"(identifier) (math_op (identifier) (identifier)))))");
2014-10-14 23:12:26 -07:00
});
});
describe("into the middle of an existing token", [&]() {
it("updates the parse tree", [&]() {
set_text("abc * 123;");
2014-09-10 18:49:53 -07:00
2016-02-02 12:03:11 -08:00
assert_root_node(
"(program (expression_statement (math_op (identifier) (number))))");
2014-09-10 18:49:53 -07:00
insert_text(strlen("ab"), "XYZ");
2014-09-10 18:49:53 -07:00
2016-02-02 12:03:11 -08:00
assert_root_node(
"(program (expression_statement (math_op (identifier) (number))))");
2014-09-10 18:49:53 -07:00
TSNode node = ts_node_named_descendant_for_char_range(root, 1, 1);
2016-09-06 21:43:59 -07:00
AssertThat(ts_node_type(node, doc), Equals("identifier"));
AssertThat(ts_node_end_byte(node), Equals(strlen("abXYZc")));
});
2014-09-10 18:49:53 -07:00
});
describe("at the end of an existing token", [&]() {
it("updates the parse tree", [&]() {
set_text("abc * 123;");
2014-09-10 18:49:53 -07:00
2016-02-02 12:03:11 -08:00
assert_root_node(
"(program (expression_statement (math_op (identifier) (number))))");
insert_text(strlen("abc"), "XYZ");
2014-09-10 18:49:53 -07:00
2016-02-02 12:03:11 -08:00
assert_root_node(
"(program (expression_statement (math_op (identifier) (number))))");
TSNode node = ts_node_named_descendant_for_char_range(root, 1, 1);
2016-09-06 21:43:59 -07:00
AssertThat(ts_node_type(node, doc), Equals("identifier"));
AssertThat(ts_node_end_byte(node), Equals(strlen("abcXYZ")));
});
2014-09-10 18:49:53 -07:00
});
describe("into a node containing a extra token", [&]() {
it("updates the parse tree", [&]() {
2014-10-14 23:12:26 -07:00
set_text("123 *\n"
"// a-comment\n"
"abc;");
2014-10-14 23:12:26 -07:00
2016-02-02 12:03:11 -08:00
assert_root_node(
"(program (expression_statement (math_op "
"(number) "
"(comment) "
2016-02-02 12:03:11 -08:00
"(identifier))))");
2014-10-14 23:12:26 -07:00
insert_text(
2015-07-31 16:29:01 -07:00
strlen("123 *\n"
"// a-comment\n"
2015-07-31 16:29:01 -07:00
"abc"),
"XYZ");
2014-10-14 23:12:26 -07:00
2016-02-02 12:03:11 -08:00
assert_root_node(
"(program (expression_statement (math_op "
"(number) "
"(comment) "
2016-02-02 12:03:11 -08:00
"(identifier))))");
2014-10-14 23:12:26 -07:00
});
});
});
2014-09-10 18:49:53 -07:00
describe("deleting text", [&]() {
describe("when a critical token is removed", [&]() {
it("updates the parse tree, creating an error", [&]() {
set_text("123 * 456; 789 * 123;");
2016-02-02 12:03:11 -08:00
assert_root_node(
"(program "
"(expression_statement (math_op (number) (number))) "
"(expression_statement (math_op (number) (number))))");
delete_text(strlen("123 "), 2);
2014-09-10 18:49:53 -07:00
2016-02-02 12:03:11 -08:00
assert_root_node(
"(program "
"(expression_statement (number) (ERROR (number))) "
"(expression_statement (math_op (number) (number))))");
});
2014-09-10 18:49:53 -07:00
});
});
describe("replacing text", [&]() {
it("does not try to re-use nodes that are within the edited region", [&]() {
ts_document_set_language(doc, get_test_language("javascript"));
set_text("{ x: (b.c) };");
2016-02-02 12:03:11 -08:00
assert_root_node(
"(program (expression_statement (object (pair "
2016-02-02 12:03:11 -08:00
"(identifier) (member_access (identifier) (identifier))))))");
replace_text(strlen("{ x: "), strlen("(b.c)"), "b.c");
2016-02-02 12:03:11 -08:00
assert_root_node(
"(program (expression_statement (object (pair "
2016-02-02 12:03:11 -08:00
"(identifier) (member_access (identifier) (identifier))))))");
});
});
it("updates the document's parse count", [&]() {
ts_document_set_language(doc, get_test_language("javascript"));
AssertThat(ts_document_parse_count(doc), Equals<size_t>(0));
set_text("{ x: (b.c) };");
AssertThat(ts_document_parse_count(doc), Equals<size_t>(1));
insert_text(strlen("{ x"), "yz");
AssertThat(ts_document_parse_count(doc), Equals<size_t>(2));
});
2014-09-10 18:49:53 -07:00
});
describe("lexing", [&]() {
before_each([&]() {
ts_document_set_language(doc, get_test_language("javascript"));
});
describe("handling tokens containing wildcard patterns (e.g. comments)", [&]() {
it("terminates them at the end of the document", [&]() {
set_text("x; // this is a comment");
2016-02-02 12:03:11 -08:00
assert_root_node(
"(program (expression_statement (identifier)) (comment))");
TSNode comment = ts_node_named_child(root, 1);
AssertThat(ts_node_start_byte(comment), Equals(strlen("x; ")));
AssertThat(ts_node_end_byte(comment), Equals(strlen("x; // this is a comment")));
});
});
it("recognizes UTF8 characters as single characters", [&]() {
// 'ΩΩΩ — ΔΔ';
set_text("'\u03A9\u03A9\u03A9 \u2014 \u0394\u0394';");
2016-02-02 12:03:11 -08:00
assert_root_node(
"(program (expression_statement (string)))");
AssertThat(ts_node_end_char(root), Equals(strlen("'OOO - DD';")));
AssertThat(ts_node_end_byte(root), Equals(strlen("'\u03A9\u03A9\u03A9 \u2014 \u0394\u0394';")));
});
});
2014-09-10 18:49:53 -07:00
});
END_TEST