Merge remote-tracking branch 'origin/check-utf8proc_iterate-return' into update-fixture-grammars

This commit is contained in:
Max Brunsfeld 2017-03-21 09:59:35 -07:00
commit 63fb041961
2 changed files with 20 additions and 4 deletions

View file

@ -36,11 +36,17 @@ static void ts_lexer__get_lookahead(Lexer *self) {
const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk;
uint32_t size = self->chunk_size - position_in_chunk + 1;
if (self->input.encoding == TSInputEncodingUTF8)
self->lookahead_size =
utf8proc_iterate(chunk, size, &self->data.lookahead);
else
if (self->input.encoding == TSInputEncodingUTF8) {
int64_t lookahead_size = utf8proc_iterate(chunk, size, &self->data.lookahead);
if (lookahead_size < 0) {
self->lookahead_size = 1;
} else {
self->lookahead_size = lookahead_size;
}
}
else {
self->lookahead_size = utf16_iterate(chunk, size, &self->data.lookahead);
}
}
static void ts_lexer__advance(void *payload, bool skip) {

View file

@ -473,6 +473,16 @@ describe("Parser", [&]() {
AssertThat(ts_node_end_char(root), Equals(strlen("'OOO - DD';")));
AssertThat(ts_node_end_byte(root), Equals(strlen("'\u03A9\u03A9\u03A9 \u2014 \u0394\u0394';")));
});
it("handles non-UTF8 characters", [&]() {
// ts_document_set_logger(document, stderr_logger_new(true));
ts_document_print_debugging_graphs(document, true);
ts_document_set_language(document, load_real_language("javascript"));
ts_document_set_input_string(document, "cons\xeb\x00e=ls\x83l6hi');\x0a");
ts_document_parse(document);
AssertThat(ts_node_end_byte(root), Equals(strlen("cons\xeb\x00e=ls\x83l6hi');\x0a")));
});
});
});