Merge pull request #92 from tree-sitter/utf16-oob

Add test for UTF16 out-of-bound read
This commit is contained in:
Max Brunsfeld 2017-07-18 17:24:31 -07:00 committed by GitHub
commit 10d28d4b56
4 changed files with 27 additions and 7 deletions

View file

@ -1,6 +1,11 @@
#include "runtime/utf16.h"
int utf16_iterate(const uint8_t *string, size_t length, int32_t *code_point) {
if (length < 2) {
*code_point = -1;
return 0;
}
uint16_t *units = (uint16_t *)string;
uint16_t unit = units[0];

View file

@ -12,8 +12,7 @@ static const size_t UTF8_MAX_CHAR_SIZE = 4;
SpyInput::SpyInput(string content, size_t chars_per_chunk) :
chars_per_chunk(chars_per_chunk),
buffer_size(UTF8_MAX_CHAR_SIZE * chars_per_chunk),
buffer(new char[buffer_size]),
buffer(nullptr),
byte_offset(0),
content(content),
encoding(TSInputEncodingUTF8),
@ -57,12 +56,19 @@ const char * SpyInput::read(void *payload, uint32_t *bytes_read) {
* This class stores its entire `content` in a contiguous buffer, but we want
* to ensure that the code under test cannot accidentally read more than
* `*bytes_read` bytes past the returned pointer. To make sure that this type
* of error does not fly, we copy the chunk into a zeroed-out buffer and
* of error does not fly, we allocate a separate buffer for each request and
* return a reference to that buffer, rather than a pointer into the main
* content.
* content. The temporary buffer only fits `*bytes_read` bytes so valgrind
* can detect code reading too many bytes from the buffer.
*/
memset(spy->buffer, 0, spy->buffer_size);
memcpy(spy->buffer, result.data(), byte_count);
delete[] spy->buffer;
if (byte_count) {
spy->buffer = new char[byte_count];
memcpy(spy->buffer, result.data(), byte_count);
} else {
spy->buffer = nullptr;
}
return spy->buffer;
}

View file

@ -13,7 +13,6 @@ struct SpyInputEdit {
class SpyInput {
uint32_t chars_per_chunk;
uint32_t buffer_size;
char *buffer;
uint32_t byte_offset;
std::vector<SpyInputEdit> undo_stack;

View file

@ -74,6 +74,16 @@ describe("Document", [&]() {
"(array (true) (false))");
});
it("handles truncated UTF16 data", [&]() {
const char content[1] = { '\0' };
spy_input->content = string(content, sizeof(content));
spy_input->encoding = TSInputEncodingUTF16;
ts_document_set_input(document, spy_input->input());
ts_document_invalidate(document);
ts_document_parse(document);
});
it("allows columns to be measured in either bytes or characters", [&]() {
const char16_t content[] = u"[true, false]";
spy_input->content = string((const char *)content, sizeof(content));