From 80cab8fd8a9c0e2d421c5c6a456bcbb9e2497319 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 25 Jun 2018 17:46:23 -0700 Subject: [PATCH] Make the empty chunk 2 bytes long, for UTF16 support --- src/runtime/lexer.c | 4 ++-- test/runtime/parser_test.cc | 24 +++++++++++++++++++++++- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index 2443fb1a..ef8c9e52 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -14,7 +14,7 @@ #define LOG_CHARACTER(message, character) \ LOG(character < 255 ? message " character:'%c'" : message " character:%d", character) -static const char empty_chunk[2] = { 0, 0 }; +static const char empty_chunk[3] = { 0, 0 }; static void ts_lexer__get_chunk(Lexer *self) { self->chunk_start = self->current_position.bytes; @@ -186,7 +186,7 @@ static void ts_lexer_goto(Lexer *self, Length position) { }; self->chunk = empty_chunk; self->chunk_start = position.bytes; - self->chunk_size = 1; + self->chunk_size = 2; } self->token_start_position = position; diff --git a/test/runtime/parser_test.cc b/test/runtime/parser_test.cc index fd2697da..182be18b 100644 --- a/test/runtime/parser_test.cc +++ b/test/runtime/parser_test.cc @@ -769,7 +769,7 @@ describe("Parser", [&]() { }); }); - describe("set_skipped_ranges", [&]() { + describe("set_included_ranges()", [&]() { it("can parse code within a single range of a document", [&]() { string source_code = "hi"; @@ -874,6 +874,28 @@ describe("Parser", [&]() { Equals({0, static_cast(source_code.find(""))}) ); }); + + it("can handle errors at the ends of the nested UTF16 documents (regression)", [&]() { + u16string source_code = u""; + + TSRange included_range = { + {0, static_cast(2u * source_code.find(u"a."))}, + {0, static_cast(2u * source_code.find(u"(source_code.find(u"a.")), + 2u * static_cast(source_code.find(u"