Handle out-of-bound read in utf16_iterate

Also simplify the test so we call `utf16_iterate` directly. Calling
`utf16_iterate` via `SpyInput` and `ts_document_parse` doesn't seem to reliably
trigger the problem using valgrind.

valgrind also doesn't detect the problem if we use a string literal like:
  `utf16_iterate("", 1, &code_point);`
This commit is contained in:
Phil Turnbull 2017-07-17 13:57:10 -07:00
parent 035abc1e15
commit e7662c2213
3 changed files with 23 additions and 13 deletions

View file

@ -1,6 +1,11 @@
#include "runtime/utf16.h"
int utf16_iterate(const uint8_t *string, size_t length, int32_t *code_point) {
if (length < 2) {
*code_point = -1;
return 0;
}
uint16_t *units = (uint16_t *)string;
uint16_t unit = units[0];

View file

@ -72,19 +72,6 @@ describe("Document", [&]() {
"(array (true) (false))");
});
it("handles truncated UTF16 data", [&]() {
char *content = reinterpret_cast<char*>(malloc(1));
spy_input->content = string((const char *)content, 1);
spy_input->encoding = TSInputEncodingUTF16;
ts_document_set_input(document, spy_input->input());
ts_document_invalidate(document);
ts_document_parse(document);
free(content);
});
it("allows columns to be measured in either bytes or characters", [&]() {
const char16_t content[] = u"[true, false]";
spy_input->content = string((const char *)content, sizeof(content));

View file

@ -0,0 +1,18 @@
#include "test_helper.h"
#include "runtime/utf16.h"
START_TEST
describe("Lexer", [&]() {
it("handles truncated UTF16 data", [&]() {
uint8_t *content = new uint8_t[1];
*content = 'A';
int32_t code_point = 0;
utf16_iterate(content, 1, &code_point);
delete[] content;
});
});
END_TEST