From e7662c2213eff6243d72bb42bf8b88bb1db9452d Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Mon, 17 Jul 2017 13:57:10 -0700 Subject: [PATCH] Handle out-of-bound read in utf16_iterate Also simplify the test so we call `utf16_iterate` directly. Calling `utf16_iterate` via `SpyInput` and `ts_document_parse` doesn't seem to reliably trigger the problem using valgrind. valgrind also doesn't detect the problem if we use a string literal like: `utf16_iterate("", 1, &code_point);` --- src/runtime/utf16.c | 5 +++++ test/runtime/document_test.cc | 13 ------------- test/runtime/lexer_test.cc | 18 ++++++++++++++++++ 3 files changed, 23 insertions(+), 13 deletions(-) create mode 100644 test/runtime/lexer_test.cc diff --git a/src/runtime/utf16.c b/src/runtime/utf16.c index a8ae6bdd..050caf19 100644 --- a/src/runtime/utf16.c +++ b/src/runtime/utf16.c @@ -1,6 +1,11 @@ #include "runtime/utf16.h" int utf16_iterate(const uint8_t *string, size_t length, int32_t *code_point) { + if (length < 2) { + *code_point = -1; + return 0; + } + uint16_t *units = (uint16_t *)string; uint16_t unit = units[0]; diff --git a/test/runtime/document_test.cc b/test/runtime/document_test.cc index 7bfc8304..99d04a51 100644 --- a/test/runtime/document_test.cc +++ b/test/runtime/document_test.cc @@ -72,19 +72,6 @@ describe("Document", [&]() { "(array (true) (false))"); }); - it("handles truncated UTF16 data", [&]() { - char *content = reinterpret_cast(malloc(1)); - - spy_input->content = string((const char *)content, 1); - spy_input->encoding = TSInputEncodingUTF16; - - ts_document_set_input(document, spy_input->input()); - ts_document_invalidate(document); - ts_document_parse(document); - - free(content); - }); - it("allows columns to be measured in either bytes or characters", [&]() { const char16_t content[] = u"[true, false]"; spy_input->content = string((const char *)content, sizeof(content)); diff --git a/test/runtime/lexer_test.cc b/test/runtime/lexer_test.cc new file mode 100644 index 00000000..b4f370f2 --- /dev/null +++ b/test/runtime/lexer_test.cc @@ -0,0 +1,18 @@ +#include "test_helper.h" +#include "runtime/utf16.h" + +START_TEST + +describe("Lexer", [&]() { + it("handles truncated UTF16 data", [&]() { + uint8_t *content = new uint8_t[1]; + *content = 'A'; + + int32_t code_point = 0; + utf16_iterate(content, 1, &code_point); + + delete[] content; + }); +}); + +END_TEST