Merge pull request #92 from tree-sitter/utf16-oob
Add test for UTF16 out-of-bound read
This commit is contained in:
commit
10d28d4b56
4 changed files with 27 additions and 7 deletions
|
|
@ -1,6 +1,11 @@
|
|||
#include "runtime/utf16.h"
|
||||
|
||||
int utf16_iterate(const uint8_t *string, size_t length, int32_t *code_point) {
|
||||
if (length < 2) {
|
||||
*code_point = -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint16_t *units = (uint16_t *)string;
|
||||
uint16_t unit = units[0];
|
||||
|
||||
|
|
|
|||
|
|
@ -12,8 +12,7 @@ static const size_t UTF8_MAX_CHAR_SIZE = 4;
|
|||
|
||||
SpyInput::SpyInput(string content, size_t chars_per_chunk) :
|
||||
chars_per_chunk(chars_per_chunk),
|
||||
buffer_size(UTF8_MAX_CHAR_SIZE * chars_per_chunk),
|
||||
buffer(new char[buffer_size]),
|
||||
buffer(nullptr),
|
||||
byte_offset(0),
|
||||
content(content),
|
||||
encoding(TSInputEncodingUTF8),
|
||||
|
|
@ -57,12 +56,19 @@ const char * SpyInput::read(void *payload, uint32_t *bytes_read) {
|
|||
* This class stores its entire `content` in a contiguous buffer, but we want
|
||||
* to ensure that the code under test cannot accidentally read more than
|
||||
* `*bytes_read` bytes past the returned pointer. To make sure that this type
|
||||
* of error does not fly, we copy the chunk into a zeroed-out buffer and
|
||||
* of error does not fly, we allocate a separate buffer for each request and
|
||||
* return a reference to that buffer, rather than a pointer into the main
|
||||
* content.
|
||||
* content. The temporary buffer only fits `*bytes_read` bytes so valgrind
|
||||
* can detect code reading too many bytes from the buffer.
|
||||
*/
|
||||
memset(spy->buffer, 0, spy->buffer_size);
|
||||
memcpy(spy->buffer, result.data(), byte_count);
|
||||
delete[] spy->buffer;
|
||||
if (byte_count) {
|
||||
spy->buffer = new char[byte_count];
|
||||
memcpy(spy->buffer, result.data(), byte_count);
|
||||
} else {
|
||||
spy->buffer = nullptr;
|
||||
}
|
||||
|
||||
return spy->buffer;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@ struct SpyInputEdit {
|
|||
|
||||
class SpyInput {
|
||||
uint32_t chars_per_chunk;
|
||||
uint32_t buffer_size;
|
||||
char *buffer;
|
||||
uint32_t byte_offset;
|
||||
std::vector<SpyInputEdit> undo_stack;
|
||||
|
|
|
|||
|
|
@ -74,6 +74,16 @@ describe("Document", [&]() {
|
|||
"(array (true) (false))");
|
||||
});
|
||||
|
||||
it("handles truncated UTF16 data", [&]() {
|
||||
const char content[1] = { '\0' };
|
||||
spy_input->content = string(content, sizeof(content));
|
||||
spy_input->encoding = TSInputEncodingUTF16;
|
||||
|
||||
ts_document_set_input(document, spy_input->input());
|
||||
ts_document_invalidate(document);
|
||||
ts_document_parse(document);
|
||||
});
|
||||
|
||||
it("allows columns to be measured in either bytes or characters", [&]() {
|
||||
const char16_t content[] = u"[true, false]";
|
||||
spy_input->content = string((const char *)content, sizeof(content));
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue