2017-03-09 20:40:01 -08:00
|
|
|
#include "test_helper.h"
|
2016-02-02 12:03:11 -08:00
|
|
|
#include "runtime/alloc.h"
|
|
|
|
|
#include "helpers/record_alloc.h"
|
2016-01-15 11:19:24 -08:00
|
|
|
#include "helpers/spy_input.h"
|
2016-03-03 12:05:04 -08:00
|
|
|
#include "helpers/load_language.h"
|
2016-01-18 10:44:49 -08:00
|
|
|
#include "helpers/record_alloc.h"
|
2017-02-19 13:53:28 -08:00
|
|
|
#include "helpers/point_helpers.h"
|
2016-12-20 13:10:18 -08:00
|
|
|
#include "helpers/stderr_logger.h"
|
|
|
|
|
#include "helpers/dedent.h"
|
2014-09-10 18:49:53 -07:00
|
|
|
|
|
|
|
|
START_TEST
|
|
|
|
|
|
|
|
|
|
describe("Parser", [&]() {
|
2016-12-21 11:32:27 -08:00
|
|
|
TSDocument *document;
|
2015-07-16 17:32:19 -07:00
|
|
|
SpyInput *input;
|
2015-07-31 15:47:48 -07:00
|
|
|
TSNode root;
|
2014-09-27 16:00:48 -07:00
|
|
|
size_t chunk_size;
|
2014-09-10 18:49:53 -07:00
|
|
|
|
|
|
|
|
before_each([&]() {
|
2016-02-02 12:03:11 -08:00
|
|
|
record_alloc::start();
|
|
|
|
|
|
2014-09-27 16:00:48 -07:00
|
|
|
chunk_size = 3;
|
2015-07-16 17:32:19 -07:00
|
|
|
input = nullptr;
|
2016-12-21 11:32:27 -08:00
|
|
|
document = ts_document_new();
|
2014-09-10 18:49:53 -07:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
after_each([&]() {
|
2016-12-21 11:37:08 -08:00
|
|
|
if (document) ts_document_free(document);
|
|
|
|
|
if (input) delete input;
|
2016-02-02 12:03:11 -08:00
|
|
|
|
|
|
|
|
record_alloc::stop();
|
|
|
|
|
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
|
2014-09-10 18:49:53 -07:00
|
|
|
});
|
|
|
|
|
|
2016-12-20 13:10:18 -08:00
|
|
|
auto set_text = [&](string text) {
|
2015-07-16 17:32:19 -07:00
|
|
|
input = new SpyInput(text, chunk_size);
|
2016-12-21 11:32:27 -08:00
|
|
|
ts_document_set_input(document, input->input());
|
|
|
|
|
ts_document_parse(document);
|
2015-09-18 23:20:06 -07:00
|
|
|
|
2016-12-21 11:32:27 -08:00
|
|
|
root = ts_document_root_node(document);
|
2016-12-20 13:10:18 -08:00
|
|
|
AssertThat(ts_node_end_byte(root), Equals(text.size()));
|
2015-07-16 17:32:19 -07:00
|
|
|
input->clear();
|
2014-09-10 18:49:53 -07:00
|
|
|
};
|
|
|
|
|
|
2014-10-12 11:47:00 -07:00
|
|
|
auto replace_text = [&](size_t position, size_t length, string new_text) {
|
2015-12-03 22:59:27 -08:00
|
|
|
size_t prev_size = ts_node_end_byte(root);
|
2014-10-12 11:47:00 -07:00
|
|
|
|
2016-12-21 11:32:27 -08:00
|
|
|
ts_document_edit(document, input->replace(position, length, new_text));
|
|
|
|
|
ts_document_parse(document);
|
2014-10-12 11:47:00 -07:00
|
|
|
|
2016-12-21 11:32:27 -08:00
|
|
|
root = ts_document_root_node(document);
|
2015-12-03 22:59:27 -08:00
|
|
|
size_t new_size = ts_node_end_byte(root);
|
2014-10-12 11:47:00 -07:00
|
|
|
AssertThat(new_size, Equals(prev_size - length + new_text.size()));
|
|
|
|
|
};
|
|
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
auto insert_text = [&](size_t position, string text) {
|
|
|
|
|
replace_text(position, 0, text);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
auto delete_text = [&](size_t position, size_t length) {
|
|
|
|
|
replace_text(position, length, "");
|
|
|
|
|
};
|
|
|
|
|
|
2017-01-07 21:45:28 -08:00
|
|
|
auto undo = [&]() {
|
|
|
|
|
ts_document_edit(document, input->undo());
|
|
|
|
|
ts_document_parse(document);
|
|
|
|
|
};
|
|
|
|
|
|
2016-02-02 12:03:11 -08:00
|
|
|
auto assert_root_node = [&](const string &expected) {
|
2016-12-21 11:32:27 -08:00
|
|
|
TSNode node = ts_document_root_node(document);
|
2016-12-21 11:37:08 -08:00
|
|
|
char *node_string = ts_node_string(node, document);
|
|
|
|
|
string actual(node_string);
|
|
|
|
|
ts_free(node_string);
|
2016-02-02 12:03:11 -08:00
|
|
|
AssertThat(actual, Equals(expected));
|
|
|
|
|
};
|
|
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
auto get_node_text = [&](TSNode node) {
|
|
|
|
|
size_t start = ts_node_start_byte(node);
|
|
|
|
|
size_t end = ts_node_end_byte(node);
|
|
|
|
|
return input->content.substr(start, end - start);
|
|
|
|
|
};
|
2016-05-20 20:26:03 -07:00
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
describe("handling errors", [&]() {
|
2016-05-20 20:26:03 -07:00
|
|
|
describe("when there is an invalid substring right before a valid token", [&]() {
|
2014-09-10 18:49:53 -07:00
|
|
|
it("computes the error node's size and position correctly", [&]() {
|
2017-03-09 11:49:30 -08:00
|
|
|
ts_document_set_language(document, load_real_language("json"));
|
2014-09-26 16:15:07 -07:00
|
|
|
set_text(" [123, @@@@@, true]");
|
2014-09-10 18:49:53 -07:00
|
|
|
|
2016-02-02 12:03:11 -08:00
|
|
|
assert_root_node(
|
2017-08-04 15:21:53 -07:00
|
|
|
"(value (array (number) (ERROR (UNEXPECTED '@')) (true)))");
|
2014-09-10 18:49:53 -07:00
|
|
|
|
2017-08-04 15:21:53 -07:00
|
|
|
TSNode error = ts_node_named_child(ts_node_child(root, 0), 1);
|
2016-12-21 11:32:27 -08:00
|
|
|
AssertThat(ts_node_type(error, document), Equals("ERROR"));
|
2017-09-13 16:38:15 -07:00
|
|
|
AssertThat(get_node_text(error), Equals("@@@@@,"));
|
2016-05-20 20:26:03 -07:00
|
|
|
AssertThat(ts_node_child_count(error), Equals<size_t>(2));
|
|
|
|
|
|
2017-09-13 16:38:15 -07:00
|
|
|
TSNode garbage = ts_node_child(error, 0);
|
2016-06-17 21:26:38 -07:00
|
|
|
AssertThat(get_node_text(garbage), Equals("@@@@@"));
|
|
|
|
|
|
2017-09-13 16:38:15 -07:00
|
|
|
TSNode comma = ts_node_child(error, 1);
|
|
|
|
|
AssertThat(get_node_text(comma), Equals(","));
|
|
|
|
|
|
2017-08-04 15:21:53 -07:00
|
|
|
TSNode node_after_error = ts_node_next_named_sibling(error);
|
2016-12-21 11:32:27 -08:00
|
|
|
AssertThat(ts_node_type(node_after_error, document), Equals("true"));
|
2016-05-20 20:26:03 -07:00
|
|
|
AssertThat(get_node_text(node_after_error), Equals("true"));
|
2014-09-10 18:49:53 -07:00
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2016-05-20 20:26:03 -07:00
|
|
|
describe("when there is an unexpected string in the middle of a token", [&]() {
|
2014-09-10 18:49:53 -07:00
|
|
|
it("computes the error node's size and position correctly", [&]() {
|
2017-03-09 11:49:30 -08:00
|
|
|
ts_document_set_language(document, load_real_language("json"));
|
2014-09-10 18:49:53 -07:00
|
|
|
set_text(" [123, faaaaalse, true]");
|
|
|
|
|
|
2016-02-02 12:03:11 -08:00
|
|
|
assert_root_node(
|
2017-08-04 15:21:53 -07:00
|
|
|
"(value (array (number) (ERROR (UNEXPECTED 'a')) (true)))");
|
2014-09-10 18:49:53 -07:00
|
|
|
|
2017-08-04 15:21:53 -07:00
|
|
|
TSNode error = ts_node_named_child(ts_node_child(root, 0), 1);
|
2016-12-21 11:32:27 -08:00
|
|
|
AssertThat(ts_node_type(error, document), Equals("ERROR"));
|
2017-09-13 16:38:15 -07:00
|
|
|
AssertThat(get_node_text(error), Equals("faaaaalse,"));
|
2016-05-20 20:26:03 -07:00
|
|
|
AssertThat(ts_node_child_count(error), Equals<size_t>(2));
|
|
|
|
|
|
2017-09-13 16:38:15 -07:00
|
|
|
TSNode garbage = ts_node_child(error, 0);
|
2016-12-21 11:32:27 -08:00
|
|
|
AssertThat(ts_node_type(garbage, document), Equals("ERROR"));
|
2016-06-17 21:26:38 -07:00
|
|
|
AssertThat(get_node_text(garbage), Equals("faaaaalse"));
|
|
|
|
|
|
2017-09-13 16:38:15 -07:00
|
|
|
TSNode comma = ts_node_child(error, 1);
|
|
|
|
|
AssertThat(ts_node_type(comma, document), Equals(","));
|
|
|
|
|
AssertThat(get_node_text(comma), Equals(","));
|
|
|
|
|
|
2017-08-04 15:21:53 -07:00
|
|
|
TSNode last = ts_node_next_named_sibling(error);
|
2016-12-21 11:32:27 -08:00
|
|
|
AssertThat(ts_node_type(last, document), Equals("true"));
|
2015-12-03 22:59:27 -08:00
|
|
|
AssertThat(ts_node_start_byte(last), Equals(strlen(" [123, faaaaalse, ")));
|
2014-09-10 18:49:53 -07:00
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2016-05-20 20:26:03 -07:00
|
|
|
describe("when there is one unexpected token between two valid tokens", [&]() {
|
2014-09-10 18:49:53 -07:00
|
|
|
it("computes the error node's size and position correctly", [&]() {
|
2017-03-09 11:49:30 -08:00
|
|
|
ts_document_set_language(document, load_real_language("json"));
|
2014-09-10 18:49:53 -07:00
|
|
|
set_text(" [123, true false, true]");
|
|
|
|
|
|
2016-02-02 12:03:11 -08:00
|
|
|
assert_root_node(
|
2017-08-04 15:21:53 -07:00
|
|
|
"(value (array (number) (true) (ERROR (false)) (true)))");
|
2014-09-10 18:49:53 -07:00
|
|
|
|
2017-08-04 15:21:53 -07:00
|
|
|
TSNode error = ts_node_named_child(ts_node_child(root, 0), 2);
|
2016-12-21 11:32:27 -08:00
|
|
|
AssertThat(ts_node_type(error, document), Equals("ERROR"));
|
2016-08-29 11:23:12 -07:00
|
|
|
AssertThat(get_node_text(error), Equals("false"));
|
2016-05-20 20:26:03 -07:00
|
|
|
AssertThat(ts_node_child_count(error), Equals<size_t>(1));
|
2014-09-10 18:49:53 -07:00
|
|
|
|
2017-08-04 15:21:53 -07:00
|
|
|
TSNode last = ts_node_next_named_sibling(error);
|
2016-12-21 11:32:27 -08:00
|
|
|
AssertThat(ts_node_type(last, document), Equals("true"));
|
2016-08-29 11:23:12 -07:00
|
|
|
AssertThat(get_node_text(last), Equals("true"));
|
2014-09-10 18:49:53 -07:00
|
|
|
});
|
|
|
|
|
});
|
2016-09-03 22:46:14 -07:00
|
|
|
|
2016-09-19 13:35:08 -07:00
|
|
|
describe("when there is an unexpected string at the end of a token", [&]() {
|
|
|
|
|
it("computes the error's size and position correctly", [&]() {
|
2017-03-09 11:49:30 -08:00
|
|
|
ts_document_set_language(document, load_real_language("json"));
|
2016-09-19 13:35:08 -07:00
|
|
|
set_text(" [123, \"hi\n, true]");
|
|
|
|
|
|
|
|
|
|
assert_root_node(
|
2017-08-04 15:21:53 -07:00
|
|
|
"(value (array (number) (ERROR (UNEXPECTED '\\n')) (true)))");
|
2016-09-19 13:35:08 -07:00
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2016-09-03 22:46:14 -07:00
|
|
|
describe("when there is an unterminated error", [&]() {
|
|
|
|
|
it("maintains a consistent tree", [&]() {
|
2017-03-09 11:49:30 -08:00
|
|
|
ts_document_set_language(document, load_real_language("javascript"));
|
2017-03-13 17:03:47 -07:00
|
|
|
set_text("a; ' this string never ends");
|
2016-09-03 22:46:14 -07:00
|
|
|
assert_root_node(
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
"(program (expression_statement (identifier)) (ERROR (UNEXPECTED EOF)))");
|
2016-09-03 22:46:14 -07:00
|
|
|
});
|
|
|
|
|
});
|
2017-02-19 13:53:28 -08:00
|
|
|
|
|
|
|
|
describe("when there are extra tokens at the end of the viable prefix", [&]() {
|
|
|
|
|
it("does not include them in the error node", [&]() {
|
2017-03-09 11:49:30 -08:00
|
|
|
ts_document_set_language(document, load_real_language("javascript"));
|
2017-02-19 13:53:28 -08:00
|
|
|
set_text(
|
|
|
|
|
"var x;\n"
|
|
|
|
|
"\n"
|
|
|
|
|
"if\n"
|
|
|
|
|
"\n"
|
|
|
|
|
"var y;"
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
TSNode error = ts_node_named_child(root, 1);
|
|
|
|
|
AssertThat(ts_node_type(error, document), Equals("ERROR"));
|
|
|
|
|
AssertThat(ts_node_start_point(error), Equals<TSPoint>({2, 0}));
|
|
|
|
|
AssertThat(ts_node_end_point(error), Equals<TSPoint>({2, 2}));
|
|
|
|
|
});
|
|
|
|
|
});
|
2017-06-23 12:09:16 -07:00
|
|
|
|
|
|
|
|
it("handles invalid UTF8 characters at EOF", [&]() {
|
|
|
|
|
char *string = (char *)malloc(1);
|
|
|
|
|
string[0] = '\xdf';
|
|
|
|
|
|
|
|
|
|
ts_document_set_language(document, load_real_language("javascript"));
|
|
|
|
|
ts_document_set_input_string_with_length(document, string, 1);
|
|
|
|
|
ts_document_parse(document);
|
|
|
|
|
|
|
|
|
|
free(string);
|
|
|
|
|
|
Simplify error recovery; eliminate recovery states
The previous approach to error recovery relied on special error-recovery
states in the parse table. For each token T, there was an error recovery
state in which the parser looked for *any* token that could follow T.
Unfortunately, sometimes the set of tokens that could follow T contained
conflicts. For example, in JS, the token '}' can be followed by the
open-ended 'template_chars' token, but also by ordinary tokens like
'identifier'. So with the old algorithm, when recovering from an
unexpected '}' token, the lexer had no way to distinguish identifiers
from template_chars.
This commit drops the error recovery states. Instead, when we encounter
an unexpected token T, we recover from the error by finding a previous
state S in the stack in which T would be valid, popping all of the nodes
after S, and wrapping them in an error.
This way, the lexer is always invoked in a normal parse state, in which
it is looking for a non-conflicting set of tokens. Eliminating the error
recovery states also shrinks the lex state machine significantly.
Signed-off-by: Rick Winfrey <rewinfrey@github.com>
2017-09-11 15:22:52 -07:00
|
|
|
assert_root_node("(program (ERROR (UNEXPECTED INVALID)))");
|
2017-06-23 12:09:16 -07:00
|
|
|
});
|
2014-09-10 18:49:53 -07:00
|
|
|
});
|
|
|
|
|
|
2015-12-17 15:50:48 -08:00
|
|
|
describe("handling extra tokens", [&]() {
|
2014-09-10 18:49:53 -07:00
|
|
|
describe("when the token appears as part of a grammar rule", [&]() {
|
2016-12-21 11:37:08 -08:00
|
|
|
it("incorporates it into the tree", [&]() {
|
2017-03-09 11:49:30 -08:00
|
|
|
ts_document_set_language(document, load_real_language("javascript"));
|
2014-09-10 18:49:53 -07:00
|
|
|
set_text("fn()\n");
|
|
|
|
|
|
2016-02-02 12:03:11 -08:00
|
|
|
assert_root_node(
|
2017-08-03 16:32:39 -07:00
|
|
|
"(program (expression_statement (call_expression (identifier) (arguments))))");
|
2014-09-10 18:49:53 -07:00
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
describe("when the token appears somewhere else", [&]() {
|
2016-12-21 11:37:08 -08:00
|
|
|
it("incorporates it into the tree", [&]() {
|
2017-03-09 11:49:30 -08:00
|
|
|
ts_document_set_language(document, load_real_language("javascript"));
|
2014-09-10 18:49:53 -07:00
|
|
|
set_text(
|
|
|
|
|
"fn()\n"
|
|
|
|
|
" .otherFn();");
|
|
|
|
|
|
2016-02-02 12:03:11 -08:00
|
|
|
assert_root_node(
|
2017-08-03 16:32:39 -07:00
|
|
|
"(program (expression_statement (call_expression "
|
|
|
|
|
"(member_expression "
|
|
|
|
|
"(call_expression (identifier) (arguments)) "
|
|
|
|
|
"(property_identifier)) "
|
2016-09-01 17:45:35 -07:00
|
|
|
"(arguments))))");
|
2014-09-10 18:49:53 -07:00
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2015-12-17 15:50:48 -08:00
|
|
|
describe("when several extra tokens appear in a row", [&]() {
|
2016-12-21 11:37:08 -08:00
|
|
|
it("incorporates them into the tree", [&]() {
|
2017-03-09 11:49:30 -08:00
|
|
|
ts_document_set_language(document, load_real_language("javascript"));
|
2014-09-10 18:49:53 -07:00
|
|
|
set_text(
|
|
|
|
|
"fn()\n\n"
|
|
|
|
|
"// This is a comment"
|
|
|
|
|
"\n\n"
|
|
|
|
|
".otherFn();");
|
|
|
|
|
|
2016-02-02 12:03:11 -08:00
|
|
|
assert_root_node(
|
2017-08-03 16:32:39 -07:00
|
|
|
"(program (expression_statement (call_expression "
|
|
|
|
|
"(member_expression "
|
|
|
|
|
"(call_expression (identifier) (arguments)) "
|
2015-08-22 10:48:34 -07:00
|
|
|
"(comment) "
|
2017-08-03 16:32:39 -07:00
|
|
|
"(property_identifier)) "
|
2016-09-01 17:45:35 -07:00
|
|
|
"(arguments))))");
|
2014-09-10 18:49:53 -07:00
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
describe("editing", [&]() {
|
2016-12-21 11:37:08 -08:00
|
|
|
describe("creating new tokens near the end of the input", [&]() {
|
|
|
|
|
it("updates the parse tree and re-reads only the changed portion of the text", [&]() {
|
2017-03-09 11:49:30 -08:00
|
|
|
ts_document_set_language(document, load_real_language("javascript"));
|
2016-12-21 11:37:08 -08:00
|
|
|
set_text("x * (100 + abc);");
|
2014-09-10 18:49:53 -07:00
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
assert_root_node(
|
2017-08-03 16:32:39 -07:00
|
|
|
"(program (expression_statement (binary_expression "
|
2016-12-21 11:37:08 -08:00
|
|
|
"(identifier) "
|
2017-08-03 16:32:39 -07:00
|
|
|
"(parenthesized_expression "
|
|
|
|
|
"(binary_expression (number) (identifier))))))");
|
2014-09-10 18:49:53 -07:00
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
insert_text(strlen("x * (100 + abc"), ".d");
|
2014-09-29 10:48:30 -07:00
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
assert_root_node(
|
2017-08-03 16:32:39 -07:00
|
|
|
"(program (expression_statement (binary_expression "
|
2016-12-21 11:37:08 -08:00
|
|
|
"(identifier) "
|
2017-08-03 16:32:39 -07:00
|
|
|
"(parenthesized_expression "
|
|
|
|
|
"(binary_expression (number) (member_expression (identifier) (property_identifier)))))))");
|
2014-09-10 18:49:53 -07:00
|
|
|
|
2018-03-02 14:51:54 -08:00
|
|
|
AssertThat(input->strings_read(), Equals(vector<string>({
|
|
|
|
|
// The '*' is not reused because the preceding `x` expression is reused, which
|
|
|
|
|
// puts the parser into a different state than when the `*` was initially tokenized.
|
|
|
|
|
// When the `*` was initially tokenized, `x` was just an identifier. In both of these
|
|
|
|
|
// states, external tokens are valid so we don't reuse tokens unless the lex states
|
|
|
|
|
// match. This could probably be improved somehow.
|
|
|
|
|
" * ",
|
|
|
|
|
" abc.d);"
|
|
|
|
|
})));
|
2014-09-10 18:49:53 -07:00
|
|
|
});
|
2016-12-21 11:37:08 -08:00
|
|
|
});
|
2014-09-10 18:49:53 -07:00
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
describe("creating new tokens near the beginning of the input", [&]() {
|
|
|
|
|
it("updates the parse tree and re-reads only the changed portion of the input", [&]() {
|
|
|
|
|
chunk_size = 2;
|
2014-10-14 23:12:26 -07:00
|
|
|
|
2017-03-09 11:49:30 -08:00
|
|
|
ts_document_set_language(document, load_real_language("javascript"));
|
2016-12-21 11:37:08 -08:00
|
|
|
set_text("123 + 456 * (10 + x);");
|
2014-10-14 23:12:26 -07:00
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
assert_root_node(
|
2017-08-03 16:32:39 -07:00
|
|
|
"(program (expression_statement (binary_expression "
|
2016-12-21 11:37:08 -08:00
|
|
|
"(number) "
|
2017-08-03 16:32:39 -07:00
|
|
|
"(binary_expression (number) (parenthesized_expression (binary_expression (number) (identifier)))))))");
|
2014-10-14 23:12:26 -07:00
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
insert_text(strlen("123"), " || 5");
|
2014-10-14 23:12:26 -07:00
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
assert_root_node(
|
2017-08-03 16:32:39 -07:00
|
|
|
"(program (expression_statement (binary_expression "
|
2016-12-21 11:37:08 -08:00
|
|
|
"(number) "
|
2017-08-03 16:32:39 -07:00
|
|
|
"(binary_expression "
|
2016-12-21 11:37:08 -08:00
|
|
|
"(number) "
|
2017-08-03 16:32:39 -07:00
|
|
|
"(binary_expression (number) (parenthesized_expression (binary_expression (number) (identifier))))))))");
|
2014-10-14 23:12:26 -07:00
|
|
|
|
2018-03-02 14:51:54 -08:00
|
|
|
AssertThat(input->strings_read(), Equals(vector<string>({
|
|
|
|
|
"123 || 5 ",
|
|
|
|
|
";"
|
|
|
|
|
})));
|
2014-10-14 23:12:26 -07:00
|
|
|
});
|
2016-12-21 11:37:08 -08:00
|
|
|
});
|
2014-10-14 23:12:26 -07:00
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
describe("introducing an error", [&]() {
|
|
|
|
|
it("gives the error the right size", [&]() {
|
2017-03-09 11:49:30 -08:00
|
|
|
ts_document_set_language(document, load_real_language("javascript"));
|
2016-12-21 11:37:08 -08:00
|
|
|
set_text("var x = y;");
|
|
|
|
|
|
|
|
|
|
assert_root_node(
|
2017-03-21 11:41:01 -07:00
|
|
|
"(program (variable_declaration (variable_declarator "
|
2016-12-21 11:37:08 -08:00
|
|
|
"(identifier) (identifier))))");
|
2014-09-10 18:49:53 -07:00
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
insert_text(strlen("var x = y"), " *");
|
2014-09-10 18:49:53 -07:00
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
assert_root_node(
|
2017-03-21 11:41:01 -07:00
|
|
|
"(program (variable_declaration (variable_declarator "
|
2016-12-21 11:37:08 -08:00
|
|
|
"(identifier) (identifier)) (ERROR)))");
|
2014-09-10 18:49:53 -07:00
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
insert_text(strlen("var x = y *"), " z");
|
2014-09-10 18:49:53 -07:00
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
assert_root_node(
|
2017-03-21 11:41:01 -07:00
|
|
|
"(program (variable_declaration (variable_declarator "
|
2017-08-03 16:32:39 -07:00
|
|
|
"(identifier) (binary_expression (identifier) (identifier)))))");
|
2014-09-10 18:49:53 -07:00
|
|
|
});
|
2016-12-21 11:37:08 -08:00
|
|
|
});
|
2014-09-10 18:49:53 -07:00
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
describe("into the middle of an existing token", [&]() {
|
|
|
|
|
it("updates the parse tree", [&]() {
|
2017-03-09 11:49:30 -08:00
|
|
|
ts_document_set_language(document, load_real_language("javascript"));
|
2016-12-21 11:37:08 -08:00
|
|
|
set_text("abc * 123;");
|
2014-09-10 18:49:53 -07:00
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
assert_root_node(
|
2017-08-03 16:32:39 -07:00
|
|
|
"(program (expression_statement (binary_expression (identifier) (number))))");
|
2014-09-29 10:48:30 -07:00
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
insert_text(strlen("ab"), "XYZ");
|
2014-09-10 18:49:53 -07:00
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
assert_root_node(
|
2017-08-03 16:32:39 -07:00
|
|
|
"(program (expression_statement (binary_expression (identifier) (number))))");
|
2014-09-29 10:48:30 -07:00
|
|
|
|
2017-12-20 16:26:38 -08:00
|
|
|
TSNode node = ts_node_named_descendant_for_byte_range(root, 1, 1);
|
2016-12-21 11:37:08 -08:00
|
|
|
AssertThat(ts_node_type(node, document), Equals("identifier"));
|
|
|
|
|
AssertThat(ts_node_end_byte(node), Equals(strlen("abXYZc")));
|
2014-09-10 18:49:53 -07:00
|
|
|
});
|
2016-12-21 11:37:08 -08:00
|
|
|
});
|
2014-10-02 11:54:00 -07:00
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
describe("at the end of an existing token", [&]() {
|
|
|
|
|
it("updates the parse tree", [&]() {
|
2017-03-09 11:49:30 -08:00
|
|
|
ts_document_set_language(document, load_real_language("javascript"));
|
2016-12-21 11:37:08 -08:00
|
|
|
set_text("abc * 123;");
|
2014-10-14 23:12:26 -07:00
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
assert_root_node(
|
2017-08-03 16:32:39 -07:00
|
|
|
"(program (expression_statement (binary_expression (identifier) (number))))");
|
2014-10-14 23:12:26 -07:00
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
insert_text(strlen("abc"), "XYZ");
|
2014-10-14 23:12:26 -07:00
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
assert_root_node(
|
2017-08-03 16:32:39 -07:00
|
|
|
"(program (expression_statement (binary_expression (identifier) (number))))");
|
2016-12-21 11:37:08 -08:00
|
|
|
|
2017-12-20 16:26:38 -08:00
|
|
|
TSNode node = ts_node_named_descendant_for_byte_range(root, 1, 1);
|
2016-12-21 11:37:08 -08:00
|
|
|
AssertThat(ts_node_type(node, document), Equals("identifier"));
|
|
|
|
|
AssertThat(ts_node_end_byte(node), Equals(strlen("abcXYZ")));
|
2014-10-14 23:12:26 -07:00
|
|
|
});
|
2014-09-29 10:48:30 -07:00
|
|
|
});
|
2014-09-10 18:49:53 -07:00
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
describe("inserting text into a node containing a extra token", [&]() {
|
|
|
|
|
it("updates the parse tree", [&]() {
|
2017-03-09 11:49:30 -08:00
|
|
|
ts_document_set_language(document, load_real_language("javascript"));
|
2016-12-21 11:37:08 -08:00
|
|
|
set_text("123 *\n"
|
|
|
|
|
"// a-comment\n"
|
|
|
|
|
"abc;");
|
2014-09-29 10:48:30 -07:00
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
assert_root_node(
|
2017-08-03 16:32:39 -07:00
|
|
|
"(program (expression_statement (binary_expression "
|
2016-12-21 11:37:08 -08:00
|
|
|
"(number) "
|
|
|
|
|
"(comment) "
|
|
|
|
|
"(identifier))))");
|
2014-09-29 10:48:30 -07:00
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
insert_text(
|
|
|
|
|
strlen("123 *\n"
|
|
|
|
|
"// a-comment\n"
|
|
|
|
|
"abc"),
|
|
|
|
|
"XYZ");
|
2014-09-10 18:49:53 -07:00
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
assert_root_node(
|
2017-08-03 16:32:39 -07:00
|
|
|
"(program (expression_statement (binary_expression "
|
2016-12-21 11:37:08 -08:00
|
|
|
"(number) "
|
|
|
|
|
"(comment) "
|
|
|
|
|
"(identifier))))");
|
2014-09-10 18:49:53 -07:00
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
describe("when a critical token is removed", [&]() {
|
|
|
|
|
it("updates the parse tree, creating an error", [&]() {
|
2017-03-09 11:49:30 -08:00
|
|
|
ts_document_set_language(document, load_real_language("javascript"));
|
2016-12-21 11:37:08 -08:00
|
|
|
set_text("123 * 456; 789 * 123;");
|
2014-10-12 11:47:00 -07:00
|
|
|
|
2016-02-02 12:03:11 -08:00
|
|
|
assert_root_node(
|
2016-12-21 11:37:08 -08:00
|
|
|
"(program "
|
2017-08-03 16:32:39 -07:00
|
|
|
"(expression_statement (binary_expression (number) (number))) "
|
|
|
|
|
"(expression_statement (binary_expression (number) (number))))");
|
2014-10-12 11:47:00 -07:00
|
|
|
|
2016-12-21 11:37:08 -08:00
|
|
|
delete_text(strlen("123 "), 2);
|
2014-10-12 11:47:00 -07:00
|
|
|
|
2016-02-02 12:03:11 -08:00
|
|
|
assert_root_node(
|
2016-12-21 11:37:08 -08:00
|
|
|
"(program "
|
2017-07-05 17:33:35 -07:00
|
|
|
"(ERROR (number)) "
|
|
|
|
|
"(expression_statement (number)) "
|
2017-08-03 16:32:39 -07:00
|
|
|
"(expression_statement (binary_expression (number) (number))))");
|
2014-10-12 11:47:00 -07:00
|
|
|
});
|
|
|
|
|
});
|
2015-09-10 14:23:42 -07:00
|
|
|
|
2016-12-20 13:10:18 -08:00
|
|
|
describe("with external tokens", [&]() {
|
|
|
|
|
it("maintains the external scanner's state during incremental parsing", [&]() {
|
2017-03-09 11:49:30 -08:00
|
|
|
ts_document_set_language(document, load_real_language("python"));
|
2016-12-20 13:10:18 -08:00
|
|
|
string text = dedent(R"PYTHON(
|
|
|
|
|
if a:
|
|
|
|
|
print b
|
|
|
|
|
return c
|
|
|
|
|
)PYTHON");
|
|
|
|
|
|
|
|
|
|
set_text(text);
|
|
|
|
|
assert_root_node("(module "
|
|
|
|
|
"(if_statement (identifier) "
|
|
|
|
|
"(print_statement (identifier))) "
|
|
|
|
|
"(return_statement (expression_list (identifier))))");
|
2016-12-20 17:06:20 -08:00
|
|
|
|
|
|
|
|
replace_text(text.find("return"), 0, " ");
|
|
|
|
|
assert_root_node("(module "
|
|
|
|
|
"(if_statement (identifier) "
|
|
|
|
|
"(print_statement (identifier)) "
|
|
|
|
|
"(return_statement (expression_list (identifier)))))");
|
2017-01-07 21:45:28 -08:00
|
|
|
|
|
|
|
|
undo();
|
|
|
|
|
assert_root_node("(module "
|
|
|
|
|
"(if_statement (identifier) "
|
|
|
|
|
"(print_statement (identifier))) "
|
|
|
|
|
"(return_statement (expression_list (identifier))))");
|
2016-12-20 13:10:18 -08:00
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2017-08-03 16:32:39 -07:00
|
|
|
it("does not try to reuse nodes that are within the edited region", [&]() {
|
2017-03-09 11:49:30 -08:00
|
|
|
ts_document_set_language(document, load_real_language("javascript"));
|
2016-12-21 11:37:08 -08:00
|
|
|
set_text("{ x: (b.c) };");
|
|
|
|
|
|
|
|
|
|
assert_root_node(
|
|
|
|
|
"(program (expression_statement (object (pair "
|
2017-08-03 16:32:39 -07:00
|
|
|
"(property_identifier) (parenthesized_expression (member_expression (identifier) (property_identifier)))))))");
|
2016-12-21 11:37:08 -08:00
|
|
|
|
|
|
|
|
replace_text(strlen("{ x: "), strlen("(b.c)"), "b.c");
|
|
|
|
|
|
|
|
|
|
assert_root_node(
|
|
|
|
|
"(program (expression_statement (object (pair "
|
2017-08-03 16:32:39 -07:00
|
|
|
"(property_identifier) (member_expression (identifier) (property_identifier))))))");
|
2016-12-21 11:37:08 -08:00
|
|
|
});
|
|
|
|
|
|
2015-12-02 07:53:15 -08:00
|
|
|
it("updates the document's parse count", [&]() {
|
2017-03-09 11:49:30 -08:00
|
|
|
ts_document_set_language(document, load_real_language("javascript"));
|
2016-12-21 11:32:27 -08:00
|
|
|
AssertThat(ts_document_parse_count(document), Equals<size_t>(0));
|
2015-09-10 14:23:42 -07:00
|
|
|
|
|
|
|
|
set_text("{ x: (b.c) };");
|
2016-12-21 11:32:27 -08:00
|
|
|
AssertThat(ts_document_parse_count(document), Equals<size_t>(1));
|
2015-09-10 14:23:42 -07:00
|
|
|
|
|
|
|
|
insert_text(strlen("{ x"), "yz");
|
2016-12-21 11:32:27 -08:00
|
|
|
AssertThat(ts_document_parse_count(document), Equals<size_t>(2));
|
2015-09-10 14:23:42 -07:00
|
|
|
});
|
2014-09-10 18:49:53 -07:00
|
|
|
});
|
2014-09-11 13:10:23 -07:00
|
|
|
|
|
|
|
|
describe("lexing", [&]() {
|
|
|
|
|
describe("handling tokens containing wildcard patterns (e.g. comments)", [&]() {
|
|
|
|
|
it("terminates them at the end of the document", [&]() {
|
2017-03-09 11:49:30 -08:00
|
|
|
ts_document_set_language(document, load_real_language("javascript"));
|
2016-01-15 11:19:24 -08:00
|
|
|
set_text("x; // this is a comment");
|
2014-09-11 13:10:23 -07:00
|
|
|
|
2016-02-02 12:03:11 -08:00
|
|
|
assert_root_node(
|
|
|
|
|
"(program (expression_statement (identifier)) (comment))");
|
2014-09-11 13:10:23 -07:00
|
|
|
|
2015-09-08 23:16:24 -07:00
|
|
|
TSNode comment = ts_node_named_child(root, 1);
|
2014-09-11 13:10:23 -07:00
|
|
|
|
2016-01-15 11:19:24 -08:00
|
|
|
AssertThat(ts_node_start_byte(comment), Equals(strlen("x; ")));
|
|
|
|
|
AssertThat(ts_node_end_byte(comment), Equals(strlen("x; // this is a comment")));
|
2014-09-11 13:10:23 -07:00
|
|
|
});
|
|
|
|
|
});
|
2014-09-27 16:00:48 -07:00
|
|
|
|
|
|
|
|
it("recognizes UTF8 characters as single characters", [&]() {
|
2016-01-15 11:19:24 -08:00
|
|
|
// 'ΩΩΩ — ΔΔ';
|
2017-03-09 11:49:30 -08:00
|
|
|
ts_document_set_language(document, load_real_language("javascript"));
|
2016-01-15 11:19:24 -08:00
|
|
|
set_text("'\u03A9\u03A9\u03A9 \u2014 \u0394\u0394';");
|
2014-09-27 16:00:48 -07:00
|
|
|
|
2016-02-02 12:03:11 -08:00
|
|
|
assert_root_node(
|
|
|
|
|
"(program (expression_statement (string)))");
|
2014-09-27 16:00:48 -07:00
|
|
|
|
2016-01-15 11:19:24 -08:00
|
|
|
AssertThat(ts_node_end_byte(root), Equals(strlen("'\u03A9\u03A9\u03A9 \u2014 \u0394\u0394';")));
|
2014-09-27 16:00:48 -07:00
|
|
|
});
|
2017-03-21 09:58:35 -07:00
|
|
|
|
|
|
|
|
it("handles non-UTF8 characters", [&]() {
|
2017-03-21 11:05:32 -07:00
|
|
|
const char *string = "cons\xeb\x00e=ls\x83l6hi');\x0a";
|
|
|
|
|
|
2017-03-21 09:58:35 -07:00
|
|
|
ts_document_set_language(document, load_real_language("javascript"));
|
2017-03-21 11:05:32 -07:00
|
|
|
ts_document_set_input_string(document, string);
|
2017-03-21 09:58:35 -07:00
|
|
|
ts_document_parse(document);
|
|
|
|
|
|
2017-03-21 11:05:32 -07:00
|
|
|
TSNode root = ts_document_root_node(document);
|
|
|
|
|
AssertThat(ts_node_end_byte(root), Equals(strlen(string)));
|
2017-03-21 09:58:35 -07:00
|
|
|
});
|
2014-09-11 13:10:23 -07:00
|
|
|
});
|
2014-09-10 18:49:53 -07:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
END_TEST
|