Handle out-of-bound read in utf16_iterate

Also simplify the test so we call `utf16_iterate` directly. Calling `utf16_iterate` via `SpyInput` and `ts_document_parse` doesn't seem to reliably trigger the problem using valgrind. valgrind also doesn't detect the problem if we use a string literal like: `utf16_iterate("", 1, &code_point);`
2017-07-17 13:57:10 -07:00 · 2017-07-17 13:57:10 -07:00 · e7662c2213
commit e7662c2213
parent 035abc1e15
3 changed files with 23 additions and 13 deletions
--- a/src/runtime/utf16.c
+++ b/src/runtime/utf16.c
@ -1,6 +1,11 @@
 #include "runtime/utf16.h"

 int utf16_iterate(const uint8_t *string, size_t length, int32_t *code_point) {
+  if (length < 2) {
+    *code_point = -1;
+    return 0;
+  }
+
  uint16_t *units = (uint16_t *)string;
  uint16_t unit = units[0];

--- a/test/runtime/document_test.cc
+++ b/test/runtime/document_test.cc
@ -72,19 +72,6 @@ describe("Document", [&]() {
        "(array (true) (false))");
    });

-    it("handles truncated UTF16 data", [&]() {
-      char *content = reinterpret_cast<char*>(malloc(1));
-
-      spy_input->content = string((const char *)content, 1);
-      spy_input->encoding = TSInputEncodingUTF16;
-
-      ts_document_set_input(document, spy_input->input());
-      ts_document_invalidate(document);
-      ts_document_parse(document);
-
-      free(content);
-    });
-
    it("allows columns to be measured in either bytes or characters", [&]() {
      const char16_t content[] = u"[true, false]";
      spy_input->content = string((const char *)content, sizeof(content));
--- a/test/runtime/lexer_test.cc
+++ b/test/runtime/lexer_test.cc
@ -0,0 +1,18 @@
+#include "test_helper.h"
+#include "runtime/utf16.h"
+
+START_TEST
+
+describe("Lexer", [&]() {
+    it("handles truncated UTF16 data", [&]() {
+      uint8_t *content = new uint8_t[1];
+      *content = 'A';
+
+      int32_t code_point = 0;
+      utf16_iterate(content, 1, &code_point);
+
+      delete[] content;
+    });
+});
+
+END_TEST