From 8a15da90fb0852149ad24a7895810436416f4f27 Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Tue, 20 Jun 2017 13:49:12 -0400 Subject: [PATCH 1/3] Update utf8proc dependency to v2.1 This includes JuliaLang/utf8proc#66 which is an out-of-bounds read when parsing malformed utf8 characters. --- externals/utf8proc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/externals/utf8proc b/externals/utf8proc index ec0daa50..40e60595 160000 --- a/externals/utf8proc +++ b/externals/utf8proc @@ -1 +1 @@ -Subproject commit ec0daa50bbedc36a0bada4a0f713eb9dc317d444 +Subproject commit 40e605959eb5cb90b2587fa88e3b661558fbc55a From 8ee3f96960611920c9bd2f4f41bf7cfb4a2747bd Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 23 Jun 2017 12:08:50 -0700 Subject: [PATCH 2/3] Fix formatting of non-ascii unexpected characters Signed-off-by: Philip Turnbull --- src/runtime/tree.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/runtime/tree.c b/src/runtime/tree.c index 195b6260..5d8e4019 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -468,13 +469,15 @@ const TSExternalTokenState *ts_tree_last_external_token_state(const Tree *tree) static size_t ts_tree__write_char_to_string(char *s, size_t n, int32_t c) { if (c == 0) return snprintf(s, n, "EOF"); + if (c == -1) + return snprintf(s, n, "INVALID"); else if (c == '\n') return snprintf(s, n, "'\\n'"); else if (c == '\t') return snprintf(s, n, "'\\t'"); else if (c == '\r') return snprintf(s, n, "'\\r'"); - else if (c < 128) + else if (0 < c && c < 128 && isprint(c)) return snprintf(s, n, "'%c'", c); else return snprintf(s, n, "%d", c); From f62ee5a0f38bff1131eeb3a2043b497f284726f2 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 23 Jun 2017 12:09:16 -0700 Subject: [PATCH 3/3] Fix OOB reads at ends of chunks Signed-off-by: Philip Turnbull --- src/runtime/lexer.c | 8 +++++++- test/runtime/parser_test.cc | 13 +++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index 21ce2b96..96bc2d13 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -34,7 +34,13 @@ static void ts_lexer__get_chunk(Lexer *self) { static void ts_lexer__get_lookahead(Lexer *self) { uint32_t position_in_chunk = self->current_position.bytes - self->chunk_start; const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk; - uint32_t size = self->chunk_size - position_in_chunk + 1; + uint32_t size = self->chunk_size - position_in_chunk; + + if (size == 0) { + self->lookahead_size = 1; + self->data.lookahead = '\0'; + return; + } if (self->input.encoding == TSInputEncodingUTF8) { int64_t lookahead_size = utf8proc_iterate(chunk, size, &self->data.lookahead); diff --git a/test/runtime/parser_test.cc b/test/runtime/parser_test.cc index d9aee54a..0c7e30a3 100644 --- a/test/runtime/parser_test.cc +++ b/test/runtime/parser_test.cc @@ -187,6 +187,19 @@ describe("Parser", [&]() { AssertThat(ts_node_end_point(error), Equals({2, 2})); }); }); + + it("handles invalid UTF8 characters at EOF", [&]() { + char *string = (char *)malloc(1); + string[0] = '\xdf'; + + ts_document_set_language(document, load_real_language("javascript")); + ts_document_set_input_string_with_length(document, string, 1); + ts_document_parse(document); + + free(string); + + assert_root_node("(ERROR (UNEXPECTED INVALID))"); + }); }); describe("handling extra tokens", [&]() {