Add spec for recognition of UTF8 characters

This commit is contained in:
Max Brunsfeld 2014-09-27 16:00:48 -07:00
parent 444188cb5f
commit 7988829c08
2 changed files with 23 additions and 4 deletions

View file

@ -10,6 +10,7 @@ static const char * spy_read(void *data, size_t *bytes_read) {
reader->position += result.size();
reader->strings_read.back() += result;
*bytes_read = result.size();
memset(reader->buffer, 0, reader->chunk_size);
memcpy(reader->buffer, result.data(), result.size());
return reader->buffer;
}

View file

@ -11,10 +11,12 @@ describe("Parser", [&]() {
TSDocument *doc;
SpyReader *reader;
TSNode *root;
size_t chunk_size;
before_each([&]() {
chunk_size = 3;
reader = nullptr;
doc = ts_document_make();
reader = NULL;
});
after_each([&]() {
@ -23,8 +25,12 @@ describe("Parser", [&]() {
delete reader;
});
auto set_chunk_size = [&](size_t size) {
chunk_size = size;
};
auto set_text = [&](const char *text) {
reader = new SpyReader(text, 3);
reader = new SpyReader(text, chunk_size);
ts_document_set_input(doc, reader->input);
root = ts_document_root_node(doc);
reader->clear();
@ -328,8 +334,6 @@ describe("Parser", [&]() {
describe("handling tokens containing wildcard patterns (e.g. comments)", [&]() {
it("terminates them at the end of the document", [&]() {
ts_document_set_language(doc, ts_language_arithmetic());
set_text("x # this is a comment");
AssertThat(ts_node_string(root), Equals("(DOCUMENT "
@ -344,6 +348,20 @@ describe("Parser", [&]() {
ts_node_release(comment);
});
});
it("recognizes UTF8 characters as single characters", [&]() {
// Inputs that return partial UTF8 characters are not yet supported
set_chunk_size(50);
// x # Ω — Δ
set_text("x # \u03A9 \u2014 \u0394");
AssertThat(ts_node_string(root), Equals("(DOCUMENT "
"(expression (variable) (comment)))"));
AssertThat(ts_node_size(root).chars, Equals(strlen("x # O - D")));
AssertThat(ts_node_size(root).bytes, Equals(strlen("x # \u03A9 \u2014 \u0394")));
});
});
});