From 035abc1e15e4f574eb6f794755f500d6f22f630a Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Mon, 17 Jul 2017 12:31:42 -0700 Subject: [PATCH] Add test for UTF16 out-of-bound read utf16_iterate does not check that 'length' is a multiple of two which leads to an out-of-bound read: ==105293== Conditional jump or move depends on uninitialised value(s) ==105293== at 0x54F014: utf16_iterate (utf16.c:7) ==105293== by 0x539251: string_iterate(TSInputEncoding, unsigned char const*, unsigned long, int*) (encoding_helpers.cc:15) ==105293== by 0x53939D: string_byte_for_character(TSInputEncoding, std::__cxx11::basic_string, std::allocator > const&, unsigned long, unsigned long) (encoding_helpers.cc:43) ==105293== by 0x507BAD: SpyInput::read(void*, unsigned int*) (spy_input.cc:47) ==105293== by 0x551049: ts_lexer__get_chunk (lexer.c:29) ==105293== by 0x5515C2: ts_lexer_start (lexer.c:152) ==105293== by 0x5469AB: parser(long,...)(long long) (parser.c:297) ==105293== by 0x547896: parser__get_lookahead (parser.c:439) ==105293== by 0x54B2DF: parser__advance (parser.c:1150) ==105293== by 0x54C2B6: parser_parse (parser.c:1348) ==105293== by 0x53F06F: ts_document_parse_with_options (document.c:136) ==105293== by 0x53EF4F: ts_document_parse (document.c:107) --- test/runtime/document_test.cc | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/test/runtime/document_test.cc b/test/runtime/document_test.cc index 99d04a51..7bfc8304 100644 --- a/test/runtime/document_test.cc +++ b/test/runtime/document_test.cc @@ -72,6 +72,19 @@ describe("Document", [&]() { "(array (true) (false))"); }); + it("handles truncated UTF16 data", [&]() { + char *content = reinterpret_cast(malloc(1)); + + spy_input->content = string((const char *)content, 1); + spy_input->encoding = TSInputEncodingUTF16; + + ts_document_set_input(document, spy_input->input()); + ts_document_invalidate(document); + ts_document_parse(document); + + free(content); + }); + it("allows columns to be measured in either bytes or characters", [&]() { const char16_t content[] = u"[true, false]"; spy_input->content = string((const char *)content, sizeof(content));