tree-sitter/spec/runtime/document_spec.cc
Max Brunsfeld 535879a2bd Represent byte, char and tree counts as 32 bit numbers
The parser spends the majority of its time allocating and freeing trees and stack nodes.
Also, the memory footprint of the AST is a significant concern when using tree-sitter
with large files. This library is already unlikely to work very well with source files
larger than 4GB, so representing rows, columns, byte lengths and child indices as
unsigned 32 bit integers seems like the right choice.
2016-11-14 12:19:13 -08:00

358 lines
11 KiB
C++

#include "spec_helper.h"
#include "runtime/alloc.h"
#include "helpers/record_alloc.h"
#include "helpers/stream_methods.h"
#include "helpers/tree_helpers.h"
#include "helpers/point_helpers.h"
#include "helpers/spy_logger.h"
#include "helpers/spy_input.h"
#include "helpers/load_language.h"
TSPoint point(size_t row, size_t column) {
return TSPoint{static_cast<uint32_t>(row), static_cast<uint32_t>(column)};
}
START_TEST
describe("Document", [&]() {
TSDocument *doc;
TSNode root;
before_each([&]() {
record_alloc::start();
doc = ts_document_new();
});
after_each([&]() {
ts_document_free(doc);
record_alloc::stop();
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
});
auto assert_node_string_equals = [&](TSNode node, const string &expected) {
char *str = ts_node_string(node, doc);
string actual(str);
ts_free(str);
AssertThat(actual, Equals(expected));
};
describe("set_input(input)", [&]() {
SpyInput *spy_input;
before_each([&]() {
spy_input = new SpyInput("{\"key\": [null, 2]}", 3);
ts_document_set_language(doc, get_test_language("json"));
ts_document_set_input_string(doc, "{\"key\": [1, 2]}");
ts_document_parse(doc);
root = ts_document_root_node(doc);
assert_node_string_equals(
root,
"(object (pair (string) (array (number) (number))))");
});
after_each([&]() {
delete spy_input;
});
it("handles both UTF8 and UTF16 encodings", [&]() {
const char16_t content[] = u"[true, false]";
spy_input->content = string((const char *)content, sizeof(content));
spy_input->encoding = TSInputEncodingUTF16;
ts_document_set_input(doc, spy_input->input());
ts_document_invalidate(doc);
ts_document_parse(doc);
root = ts_document_root_node(doc);
assert_node_string_equals(
root,
"(array (true) (false))");
});
it("allows columns to be measured in either bytes or characters", [&]() {
const char16_t content[] = u"[true, false]";
spy_input->content = string((const char *)content, sizeof(content));
spy_input->encoding = TSInputEncodingUTF16;
// spy_input->measure_columns_in_bytes
ts_document_set_input(doc, spy_input->input());
ts_document_invalidate(doc);
ts_document_parse(doc);
});
it("allows the input to be retrieved later", [&]() {
ts_document_set_input(doc, spy_input->input());
AssertThat(ts_document_input(doc).payload, Equals<void *>(spy_input));
AssertThat(ts_document_input(doc).read, Equals(spy_input->input().read));
AssertThat(ts_document_input(doc).seek, Equals(spy_input->input().seek));
});
it("does not assume that the document's text has changed", [&]() {
ts_document_set_input(doc, spy_input->input());
AssertThat(ts_document_root_node(doc), Equals<TSNode>(root));
AssertThat(ts_node_has_changes(root), IsFalse());
AssertThat(spy_input->strings_read, Equals(vector<string>({ "" })));
});
it("reads text from the new input for future parses", [&]() {
ts_document_set_input(doc, spy_input->input());
// Insert 'null', delete '1'.
TSInputEdit edit = {};
edit.start_point.column = edit.start_byte = strlen("{\"key\": [");
edit.extent_added.column = edit.bytes_added = 4;
edit.extent_removed.column = edit.bytes_removed = 1;
ts_document_edit(doc, edit);
ts_document_parse(doc);
TSNode new_root = ts_document_root_node(doc);
assert_node_string_equals(
new_root,
"(object (pair (string) (array (null) (number))))");
AssertThat(spy_input->strings_read, Equals(vector<string>({" [null, 2"})));
});
it("reads from the new input correctly when the old input was blank", [&]() {
ts_document_set_input_string(doc, "");
ts_document_parse(doc);
TSNode new_root = ts_document_root_node(doc);
AssertThat(ts_node_end_char(new_root), Equals<size_t>(0));
assert_node_string_equals(
new_root,
"(ERROR)");
ts_document_set_input_string(doc, "1");
ts_document_parse(doc);
new_root = ts_document_root_node(doc);
AssertThat(ts_node_end_char(new_root), Equals<size_t>(1));
assert_node_string_equals(
new_root,
"(number)");
});
});
describe("set_language(language)", [&]() {
before_each([&]() {
ts_document_set_input_string(doc, "{\"key\": [1, 2]}\n");
});
it("uses the given language for future parses", [&]() {
ts_document_set_language(doc, get_test_language("json"));
ts_document_parse(doc);
root = ts_document_root_node(doc);
assert_node_string_equals(
root,
"(object (pair (string) (array (number) (number))))");
});
it("clears out any previous tree", [&]() {
ts_document_set_language(doc, get_test_language("json"));
ts_document_parse(doc);
ts_document_set_language(doc, get_test_language("javascript"));
AssertThat(ts_document_root_node(doc).data, Equals<void *>(nullptr));
ts_document_parse(doc);
root = ts_document_root_node(doc);
assert_node_string_equals(
root,
"(program (expression_statement "
"(object (pair (string) (array (number) (number))))))");
});
});
describe("set_logger(TSLogger)", [&]() {
SpyLogger *logger;
before_each([&]() {
logger = new SpyLogger();
ts_document_set_language(doc, get_test_language("json"));
ts_document_set_input_string(doc, "[1, 2]");
});
after_each([&]() {
delete logger;
});
it("calls the debugger with a message for each lex action", [&]() {
ts_document_set_logger(doc, logger->logger());
ts_document_parse(doc);
AssertThat(logger->messages, Contains("lookahead char:'1'"));
AssertThat(logger->messages, Contains("lookahead char:'['"));
});
it("calls the debugger with a message for each parse action", [&]() {
ts_document_set_logger(doc, logger->logger());
ts_document_parse(doc);
AssertThat(logger->messages, Contains("new_parse"));
AssertThat(logger->messages, Contains("lookahead char:'['"));
AssertThat(logger->messages, Contains("reduce sym:array, child_count:4"));
AssertThat(logger->messages, Contains("accept"));
});
it("allows the debugger to be retrieved later", [&]() {
ts_document_set_logger(doc, logger->logger());
AssertThat(ts_document_logger(doc).payload, Equals(logger));
});
describe("disabling debugging", [&]() {
before_each([&]() {
ts_document_set_logger(doc, logger->logger());
ts_document_set_logger(doc, {NULL, NULL});
});
it("does not call the debugger any more", [&]() {
ts_document_parse(doc);
AssertThat(logger->messages, IsEmpty());
});
});
});
describe("parse_and_get_changed_ranges()", [&]() {
SpyInput *input;
before_each([&]() {
ts_document_set_language(doc, get_test_language("javascript"));
input = new SpyInput("{a: null};", 3);
ts_document_set_input(doc, input->input());
ts_document_parse(doc);
assert_node_string_equals(
ts_document_root_node(doc),
"(program (expression_statement (object (pair (identifier) (null)))))");
});
after_each([&]() {
delete input;
});
auto get_ranges = [&](std::function<TSInputEdit()> callback) -> vector<TSRange> {
TSInputEdit edit = callback();
ts_document_edit(doc, edit);
TSRange *ranges;
uint32_t range_count = 0;
ts_document_parse_and_get_changed_ranges(doc, &ranges, &range_count);
vector<TSRange> result;
for (size_t i = 0; i < range_count; i++)
result.push_back(ranges[i]);
ts_free(ranges);
return result;
};
it("reports changes when one token has been updated", [&]() {
// Replace `null` with `nothing`
auto ranges = get_ranges([&]() {
return input->replace(input->content.find("ull"), 1, "othing");
});
AssertThat(ranges, Equals(vector<TSRange>({
TSRange{
point(0, input->content.find("nothing")),
point(0, input->content.find("}"))
},
})));
// Replace `nothing` with `null` again
ranges = get_ranges([&]() {
return input->undo();
});
AssertThat(ranges, Equals(vector<TSRange>({
TSRange{
point(0, input->content.find("null")),
point(0, input->content.find("}"))
},
})));
});
it("reports changes when tokens have been appended", [&]() {
// Add a second key-value pair
auto ranges = get_ranges([&]() {
return input->replace(input->content.find("}"), 0, ", b: false");
});
AssertThat(ranges, Equals(vector<TSRange>({
TSRange{
point(0, input->content.find(",")),
point(0, input->content.find("}"))
},
})));
// Add a third key-value pair in between the first two
ranges = get_ranges([&]() {
return input->replace(input->content.find(", b"), 0, ", c: 1");
});
assert_node_string_equals(
ts_document_root_node(doc),
"(program (expression_statement (object "
"(pair (identifier) (null)) "
"(pair (identifier) (number)) "
"(pair (identifier) (false)))))");
AssertThat(ranges, Equals(vector<TSRange>({
TSRange{
point(0, input->content.find(", c")),
point(0, input->content.find(", b"))
},
})));
// Delete the middle pair.
ranges = get_ranges([&]() {
return input->undo();
});
assert_node_string_equals(
ts_document_root_node(doc),
"(program (expression_statement (object "
"(pair (identifier) (null)) "
"(pair (identifier) (false)))))");
AssertThat(ranges, Equals(vector<TSRange>({
})));
// Delete the second pair.
ranges = get_ranges([&]() {
return input->undo();
});
assert_node_string_equals(
ts_document_root_node(doc),
"(program (expression_statement (object "
"(pair (identifier) (null)))))");
AssertThat(ranges, Equals(vector<TSRange>({
})));
});
it("reports changes when trees have been wrapped", [&]() {
// Wrap the object in an assignment expression.
auto ranges = get_ranges([&]() {
return input->replace(input->content.find("null"), 0, "b === ");
});
assert_node_string_equals(
ts_document_root_node(doc),
"(program (expression_statement (object "
"(pair (identifier) (rel_op (identifier) (null))))))");
AssertThat(ranges, Equals(vector<TSRange>({
TSRange{
point(0, input->content.find("b ===")),
point(0, input->content.find("}"))
},
})));
});
});
});
END_TEST