diff --git a/src/runtime/utf16.c b/src/runtime/utf16.c index a8ae6bdd..050caf19 100644 --- a/src/runtime/utf16.c +++ b/src/runtime/utf16.c @@ -1,6 +1,11 @@ #include "runtime/utf16.h" int utf16_iterate(const uint8_t *string, size_t length, int32_t *code_point) { + if (length < 2) { + *code_point = -1; + return 0; + } + uint16_t *units = (uint16_t *)string; uint16_t unit = units[0]; diff --git a/test/helpers/spy_input.cc b/test/helpers/spy_input.cc index 9edaf554..ec370b80 100644 --- a/test/helpers/spy_input.cc +++ b/test/helpers/spy_input.cc @@ -12,8 +12,7 @@ static const size_t UTF8_MAX_CHAR_SIZE = 4; SpyInput::SpyInput(string content, size_t chars_per_chunk) : chars_per_chunk(chars_per_chunk), - buffer_size(UTF8_MAX_CHAR_SIZE * chars_per_chunk), - buffer(new char[buffer_size]), + buffer(nullptr), byte_offset(0), content(content), encoding(TSInputEncodingUTF8), @@ -57,12 +56,19 @@ const char * SpyInput::read(void *payload, uint32_t *bytes_read) { * This class stores its entire `content` in a contiguous buffer, but we want * to ensure that the code under test cannot accidentally read more than * `*bytes_read` bytes past the returned pointer. To make sure that this type - * of error does not fly, we copy the chunk into a zeroed-out buffer and + * of error does not fly, we allocate a separate buffer for each request and * return a reference to that buffer, rather than a pointer into the main - * content. + * content. The temporary buffer only fits `*bytes_read` bytes so valgrind + * can detect code reading too many bytes from the buffer. */ - memset(spy->buffer, 0, spy->buffer_size); - memcpy(spy->buffer, result.data(), byte_count); + delete[] spy->buffer; + if (byte_count) { + spy->buffer = new char[byte_count]; + memcpy(spy->buffer, result.data(), byte_count); + } else { + spy->buffer = nullptr; + } + return spy->buffer; } diff --git a/test/helpers/spy_input.h b/test/helpers/spy_input.h index 9e0ee8d1..2a15ad9b 100644 --- a/test/helpers/spy_input.h +++ b/test/helpers/spy_input.h @@ -13,7 +13,6 @@ struct SpyInputEdit { class SpyInput { uint32_t chars_per_chunk; - uint32_t buffer_size; char *buffer; uint32_t byte_offset; std::vector undo_stack; diff --git a/test/runtime/document_test.cc b/test/runtime/document_test.cc index 7bd85ebf..2030a1d1 100644 --- a/test/runtime/document_test.cc +++ b/test/runtime/document_test.cc @@ -74,6 +74,16 @@ describe("Document", [&]() { "(array (true) (false))"); }); + it("handles truncated UTF16 data", [&]() { + const char content[1] = { '\0' }; + spy_input->content = string(content, sizeof(content)); + spy_input->encoding = TSInputEncodingUTF16; + + ts_document_set_input(document, spy_input->input()); + ts_document_invalidate(document); + ts_document_parse(document); + }); + it("allows columns to be measured in either bytes or characters", [&]() { const char16_t content[] = u"[true, false]"; spy_input->content = string((const char *)content, sizeof(content));