Fix OOB reads at ends of chunks
Signed-off-by: Philip Turnbull <philipturnbull@github.com>
This commit is contained in:
parent
8ee3f96960
commit
f62ee5a0f3
2 changed files with 20 additions and 1 deletions
|
|
@ -34,7 +34,13 @@ static void ts_lexer__get_chunk(Lexer *self) {
|
|||
static void ts_lexer__get_lookahead(Lexer *self) {
|
||||
uint32_t position_in_chunk = self->current_position.bytes - self->chunk_start;
|
||||
const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk;
|
||||
uint32_t size = self->chunk_size - position_in_chunk + 1;
|
||||
uint32_t size = self->chunk_size - position_in_chunk;
|
||||
|
||||
if (size == 0) {
|
||||
self->lookahead_size = 1;
|
||||
self->data.lookahead = '\0';
|
||||
return;
|
||||
}
|
||||
|
||||
if (self->input.encoding == TSInputEncodingUTF8) {
|
||||
int64_t lookahead_size = utf8proc_iterate(chunk, size, &self->data.lookahead);
|
||||
|
|
|
|||
|
|
@ -187,6 +187,19 @@ describe("Parser", [&]() {
|
|||
AssertThat(ts_node_end_point(error), Equals<TSPoint>({2, 2}));
|
||||
});
|
||||
});
|
||||
|
||||
it("handles invalid UTF8 characters at EOF", [&]() {
|
||||
char *string = (char *)malloc(1);
|
||||
string[0] = '\xdf';
|
||||
|
||||
ts_document_set_language(document, load_real_language("javascript"));
|
||||
ts_document_set_input_string_with_length(document, string, 1);
|
||||
ts_document_parse(document);
|
||||
|
||||
free(string);
|
||||
|
||||
assert_root_node("(ERROR (UNEXPECTED INVALID))");
|
||||
});
|
||||
});
|
||||
|
||||
describe("handling extra tokens", [&]() {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue