Merge remote-tracking branch 'origin/check-utf8proc_iterate-return' into update-fixture-grammars
This commit is contained in:
commit
63fb041961
2 changed files with 20 additions and 4 deletions
|
|
@ -36,11 +36,17 @@ static void ts_lexer__get_lookahead(Lexer *self) {
|
|||
const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk;
|
||||
uint32_t size = self->chunk_size - position_in_chunk + 1;
|
||||
|
||||
if (self->input.encoding == TSInputEncodingUTF8)
|
||||
self->lookahead_size =
|
||||
utf8proc_iterate(chunk, size, &self->data.lookahead);
|
||||
else
|
||||
if (self->input.encoding == TSInputEncodingUTF8) {
|
||||
int64_t lookahead_size = utf8proc_iterate(chunk, size, &self->data.lookahead);
|
||||
if (lookahead_size < 0) {
|
||||
self->lookahead_size = 1;
|
||||
} else {
|
||||
self->lookahead_size = lookahead_size;
|
||||
}
|
||||
}
|
||||
else {
|
||||
self->lookahead_size = utf16_iterate(chunk, size, &self->data.lookahead);
|
||||
}
|
||||
}
|
||||
|
||||
static void ts_lexer__advance(void *payload, bool skip) {
|
||||
|
|
|
|||
|
|
@ -473,6 +473,16 @@ describe("Parser", [&]() {
|
|||
AssertThat(ts_node_end_char(root), Equals(strlen("'OOO - DD';")));
|
||||
AssertThat(ts_node_end_byte(root), Equals(strlen("'\u03A9\u03A9\u03A9 \u2014 \u0394\u0394';")));
|
||||
});
|
||||
|
||||
it("handles non-UTF8 characters", [&]() {
|
||||
// ts_document_set_logger(document, stderr_logger_new(true));
|
||||
ts_document_print_debugging_graphs(document, true);
|
||||
ts_document_set_language(document, load_real_language("javascript"));
|
||||
ts_document_set_input_string(document, "cons\xeb\x00e=ls\x83l6hi');\x0a");
|
||||
ts_document_parse(document);
|
||||
|
||||
AssertThat(ts_node_end_byte(root), Equals(strlen("cons\xeb\x00e=ls\x83l6hi');\x0a")));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue