Fix OOB reads at ends of chunks

Signed-off-by: Philip Turnbull <philipturnbull@github.com>
This commit is contained in:
Max Brunsfeld 2017-06-23 12:09:16 -07:00 committed by Philip Turnbull
parent 8ee3f96960
commit f62ee5a0f3
2 changed files with 20 additions and 1 deletions

View file

@ -34,7 +34,13 @@ static void ts_lexer__get_chunk(Lexer *self) {
static void ts_lexer__get_lookahead(Lexer *self) {
uint32_t position_in_chunk = self->current_position.bytes - self->chunk_start;
const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk;
uint32_t size = self->chunk_size - position_in_chunk + 1;
uint32_t size = self->chunk_size - position_in_chunk;
if (size == 0) {
self->lookahead_size = 1;
self->data.lookahead = '\0';
return;
}
if (self->input.encoding == TSInputEncodingUTF8) {
int64_t lookahead_size = utf8proc_iterate(chunk, size, &self->data.lookahead);

View file

@ -187,6 +187,19 @@ describe("Parser", [&]() {
AssertThat(ts_node_end_point(error), Equals<TSPoint>({2, 2}));
});
});
it("handles invalid UTF8 characters at EOF", [&]() {
char *string = (char *)malloc(1);
string[0] = '\xdf';
ts_document_set_language(document, load_real_language("javascript"));
ts_document_set_input_string_with_length(document, string, 1);
ts_document_parse(document);
free(string);
assert_root_node("(ERROR (UNEXPECTED INVALID))");
});
});
describe("handling extra tokens", [&]() {