Merge pull request #78 from philipturnbull/update-utf8proc

Out of bounds read in utf8proc
This commit is contained in:
Max Brunsfeld 2017-06-23 12:18:21 -07:00 committed by GitHub
commit 076002a01e
4 changed files with 25 additions and 3 deletions

2
externals/utf8proc vendored

@ -1 +1 @@
Subproject commit ec0daa50bbedc36a0bada4a0f713eb9dc317d444
Subproject commit 40e605959eb5cb90b2587fa88e3b661558fbc55a

View file

@ -34,7 +34,13 @@ static void ts_lexer__get_chunk(Lexer *self) {
static void ts_lexer__get_lookahead(Lexer *self) {
uint32_t position_in_chunk = self->current_position.bytes - self->chunk_start;
const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk;
uint32_t size = self->chunk_size - position_in_chunk + 1;
uint32_t size = self->chunk_size - position_in_chunk;
if (size == 0) {
self->lookahead_size = 1;
self->data.lookahead = '\0';
return;
}
if (self->input.encoding == TSInputEncodingUTF8) {
int64_t lookahead_size = utf8proc_iterate(chunk, size, &self->data.lookahead);

View file

@ -1,4 +1,5 @@
#include <assert.h>
#include <ctype.h>
#include <limits.h>
#include <stdbool.h>
#include <string.h>
@ -468,13 +469,15 @@ const TSExternalTokenState *ts_tree_last_external_token_state(const Tree *tree)
static size_t ts_tree__write_char_to_string(char *s, size_t n, int32_t c) {
if (c == 0)
return snprintf(s, n, "EOF");
if (c == -1)
return snprintf(s, n, "INVALID");
else if (c == '\n')
return snprintf(s, n, "'\\n'");
else if (c == '\t')
return snprintf(s, n, "'\\t'");
else if (c == '\r')
return snprintf(s, n, "'\\r'");
else if (c < 128)
else if (0 < c && c < 128 && isprint(c))
return snprintf(s, n, "'%c'", c);
else
return snprintf(s, n, "%d", c);

View file

@ -187,6 +187,19 @@ describe("Parser", [&]() {
AssertThat(ts_node_end_point(error), Equals<TSPoint>({2, 2}));
});
});
it("handles invalid UTF8 characters at EOF", [&]() {
char *string = (char *)malloc(1);
string[0] = '\xdf';
ts_document_set_language(document, load_real_language("javascript"));
ts_document_set_input_string_with_length(document, string, 1);
ts_document_parse(document);
free(string);
assert_root_node("(ERROR (UNEXPECTED INVALID))");
});
});
describe("handling extra tokens", [&]() {