From 8a15da90fb0852149ad24a7895810436416f4f27 Mon Sep 17 00:00:00 2001
From: Phil Turnbull <philipturnbull@github.com>
Date: Tue, 20 Jun 2017 13:49:12 -0400
Subject: [PATCH 1/3] Update utf8proc dependency to v2.1

This includes JuliaLang/utf8proc#66 which is an out-of-bounds read when parsing
malformed utf8 characters.
---
 externals/utf8proc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/externals/utf8proc b/externals/utf8proc
index ec0daa50..40e60595 160000
--- a/externals/utf8proc
+++ b/externals/utf8proc
@@ -1 +1 @@
-Subproject commit ec0daa50bbedc36a0bada4a0f713eb9dc317d444
+Subproject commit 40e605959eb5cb90b2587fa88e3b661558fbc55a

From 8ee3f96960611920c9bd2f4f41bf7cfb4a2747bd Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 23 Jun 2017 12:08:50 -0700
Subject: [PATCH 2/3] Fix formatting of non-ascii unexpected characters

Signed-off-by: Philip Turnbull <philipturnbull@github.com>
---
 src/runtime/tree.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/runtime/tree.c b/src/runtime/tree.c
index 195b6260..5d8e4019 100644
--- a/src/runtime/tree.c
+++ b/src/runtime/tree.c
@@ -1,4 +1,5 @@
 #include <assert.h>
+#include <ctype.h>
 #include <limits.h>
 #include <stdbool.h>
 #include <string.h>
@@ -468,13 +469,15 @@ const TSExternalTokenState *ts_tree_last_external_token_state(const Tree *tree)
 static size_t ts_tree__write_char_to_string(char *s, size_t n, int32_t c) {
   if (c == 0)
     return snprintf(s, n, "EOF");
+  if (c == -1)
+    return snprintf(s, n, "INVALID");
   else if (c == '\n')
     return snprintf(s, n, "'\\n'");
   else if (c == '\t')
     return snprintf(s, n, "'\\t'");
   else if (c == '\r')
     return snprintf(s, n, "'\\r'");
-  else if (c < 128)
+  else if (0 < c && c < 128 && isprint(c))
     return snprintf(s, n, "'%c'", c);
   else
     return snprintf(s, n, "%d", c);

From f62ee5a0f38bff1131eeb3a2043b497f284726f2 Mon Sep 17 00:00:00 2001
From: Max Brunsfeld <maxbrunsfeld@gmail.com>
Date: Fri, 23 Jun 2017 12:09:16 -0700
Subject: [PATCH 3/3] Fix OOB reads at ends of chunks

Signed-off-by: Philip Turnbull <philipturnbull@github.com>
---
 src/runtime/lexer.c         |  8 +++++++-
 test/runtime/parser_test.cc | 13 +++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c
index 21ce2b96..96bc2d13 100644
--- a/src/runtime/lexer.c
+++ b/src/runtime/lexer.c
@@ -34,7 +34,13 @@ static void ts_lexer__get_chunk(Lexer *self) {
 static void ts_lexer__get_lookahead(Lexer *self) {
   uint32_t position_in_chunk = self->current_position.bytes - self->chunk_start;
   const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk;
-  uint32_t size = self->chunk_size - position_in_chunk + 1;
+  uint32_t size = self->chunk_size - position_in_chunk;
+
+  if (size == 0) {
+    self->lookahead_size = 1;
+    self->data.lookahead = '\0';
+    return;
+  }
 
   if (self->input.encoding == TSInputEncodingUTF8) {
     int64_t lookahead_size = utf8proc_iterate(chunk, size, &self->data.lookahead);
diff --git a/test/runtime/parser_test.cc b/test/runtime/parser_test.cc
index d9aee54a..0c7e30a3 100644
--- a/test/runtime/parser_test.cc
+++ b/test/runtime/parser_test.cc
@@ -187,6 +187,19 @@ describe("Parser", [&]() {
         AssertThat(ts_node_end_point(error), Equals<TSPoint>({2, 2}));
       });
     });
+
+    it("handles invalid UTF8 characters at EOF", [&]() {
+      char *string = (char *)malloc(1);
+      string[0] = '\xdf';
+
+      ts_document_set_language(document, load_real_language("javascript"));
+      ts_document_set_input_string_with_length(document, string, 1);
+      ts_document_parse(document);
+
+      free(string);
+
+      assert_root_node("(ERROR (UNEXPECTED INVALID))");
+    });
   });
 
   describe("handling extra tokens", [&]() {