Make the empty chunk 2 bytes long, for UTF16 support

This commit is contained in:
Max Brunsfeld 2018-06-25 17:46:23 -07:00
parent 4f5a87b952
commit 80cab8fd8a
2 changed files with 25 additions and 3 deletions

View file

@ -14,7 +14,7 @@
#define LOG_CHARACTER(message, character) \
LOG(character < 255 ? message " character:'%c'" : message " character:%d", character)
static const char empty_chunk[2] = { 0, 0 };
static const char empty_chunk[3] = { 0, 0 };
static void ts_lexer__get_chunk(Lexer *self) {
self->chunk_start = self->current_position.bytes;
@ -186,7 +186,7 @@ static void ts_lexer_goto(Lexer *self, Length position) {
};
self->chunk = empty_chunk;
self->chunk_start = position.bytes;
self->chunk_size = 1;
self->chunk_size = 2;
}
self->token_start_position = position;

View file

@ -769,7 +769,7 @@ describe("Parser", [&]() {
});
});
describe("set_skipped_ranges", [&]() {
describe("set_included_ranges()", [&]() {
it("can parse code within a single range of a document", [&]() {
string source_code = "<span>hi</span><script>console.log('sup');</script>";
@ -874,6 +874,28 @@ describe("Parser", [&]() {
Equals<TSPoint>({0, static_cast<uint32_t>(source_code.find("<b>"))})
);
});
it("can handle errors at the ends of the nested UTF16 documents (regression)", [&]() {
u16string source_code = u"<script>a.</script>";
TSRange included_range = {
{0, static_cast<uint32_t>(2u * source_code.find(u"a."))},
{0, static_cast<uint32_t>(2u * source_code.find(u"</script"))},
2u * static_cast<uint32_t>(source_code.find(u"a.")),
2u * static_cast<uint32_t>(source_code.find(u"</script")),
};
ts_parser_set_logger(parser, stderr_logger_new(true));
ts_parser_set_included_ranges(parser, &included_range, 1);
ts_parser_set_language(parser, load_real_language("javascript"));
SpyInput input("", 3);
input.content.assign((const char *)source_code.c_str(), source_code.size() * 2);
input.encoding = TSInputEncodingUTF16;
tree = ts_parser_parse(parser, nullptr, input.input());
assert_root_node("(program (ERROR (identifier)))");
});
});
});