diff --git a/src/runtime/utf16.c b/src/runtime/utf16.c index a8ae6bdd..050caf19 100644 --- a/src/runtime/utf16.c +++ b/src/runtime/utf16.c @@ -1,6 +1,11 @@ #include "runtime/utf16.h" int utf16_iterate(const uint8_t *string, size_t length, int32_t *code_point) { + if (length < 2) { + *code_point = -1; + return 0; + } + uint16_t *units = (uint16_t *)string; uint16_t unit = units[0]; diff --git a/test/runtime/document_test.cc b/test/runtime/document_test.cc index 7bfc8304..99d04a51 100644 --- a/test/runtime/document_test.cc +++ b/test/runtime/document_test.cc @@ -72,19 +72,6 @@ describe("Document", [&]() { "(array (true) (false))"); }); - it("handles truncated UTF16 data", [&]() { - char *content = reinterpret_cast(malloc(1)); - - spy_input->content = string((const char *)content, 1); - spy_input->encoding = TSInputEncodingUTF16; - - ts_document_set_input(document, spy_input->input()); - ts_document_invalidate(document); - ts_document_parse(document); - - free(content); - }); - it("allows columns to be measured in either bytes or characters", [&]() { const char16_t content[] = u"[true, false]"; spy_input->content = string((const char *)content, sizeof(content)); diff --git a/test/runtime/lexer_test.cc b/test/runtime/lexer_test.cc new file mode 100644 index 00000000..b4f370f2 --- /dev/null +++ b/test/runtime/lexer_test.cc @@ -0,0 +1,18 @@ +#include "test_helper.h" +#include "runtime/utf16.h" + +START_TEST + +describe("Lexer", [&]() { + it("handles truncated UTF16 data", [&]() { + uint8_t *content = new uint8_t[1]; + *content = 'A'; + + int32_t code_point = 0; + utf16_iterate(content, 1, &code_point); + + delete[] content; + }); +}); + +END_TEST