SpyInput uses a fixed-size buffer and explicitly zeros memory which is good for
catching logic errors but defeats valgrind's memory tracking. Use a separate
buffer of exactly the correct size for each request. This correctly catches the
problem under valgrind:
```
==8694== Invalid read of size 2
==8694== at 0x54EFFB: utf16_iterate (utf16.c:10)
==8694== by 0x551126: ts_lexer__get_lookahead (lexer.c:54)
==8694== by 0x5515CD: ts_lexer_start (lexer.c:154)
==8694== by 0x54699F: parser(long,...)(long long) (parser.c:297)
==8694== by 0x54788A: parser__get_lookahead (parser.c:439)
==8694== by 0x54B2D3: parser__advance (parser.c:1150)
==8694== by 0x54C2AA: parser_parse (parser.c:1348)
==8694== by 0x53F063: ts_document_parse_with_options (document.c:136)
==8694== by 0x53EF43: ts_document_parse (document.c:107)
==8694== by 0x4AED11: {lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda()#4}::operator()() const (document_test.cc:82)
==8694== by 0x4B56B6: std::_Function_handler<void (), {lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda()#4}>::_M_invoke(std::_Any_data const&) (functional:1871)
==8694== by 0x40F8C5: std::function<void ()>::operator()() const (functional:2267)
==8694== Address 0x5d08be0 is 0 bytes inside a block of size 1 alloc'd
==8694== at 0x4C2E80F: operator new[](unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==8694== by 0x507C3E: SpyInput::read(void*, unsigned int*) (spy_input.cc:66)
==8694== by 0x55103D: ts_lexer__get_chunk (lexer.c:29)
==8694== by 0x5515B6: ts_lexer_start (lexer.c:152)
==8694== by 0x54699F: parser(long,...)(long long) (parser.c:297)
==8694== by 0x54788A: parser__get_lookahead (parser.c:439)
==8694== by 0x54B2D3: parser__advance (parser.c:1150)
==8694== by 0x54C2AA: parser_parse (parser.c:1348)
==8694== by 0x53F063: ts_document_parse_with_options (document.c:136)
==8694== by 0x53EF43: ts_document_parse (document.c:107)
==8694== by 0x4AED11: {lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda()#4}::operator()() const (document_test.cc:82)
==8694== by 0x4B56B6: std::_Function_handler<void (), {lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda()#4}>::_M_invoke(std::_Any_data const&) (functional:1871)
```
160 lines
4.6 KiB
C++
160 lines
4.6 KiB
C++
#include "helpers/spy_input.h"
|
|
#include "helpers/encoding_helpers.h"
|
|
#include <string.h>
|
|
#include <algorithm>
|
|
#include <assert.h>
|
|
|
|
using std::pair;
|
|
using std::string;
|
|
using std::vector;
|
|
|
|
static const size_t UTF8_MAX_CHAR_SIZE = 4;
|
|
|
|
SpyInput::SpyInput(string content, size_t chars_per_chunk) :
|
|
chars_per_chunk(chars_per_chunk),
|
|
buffer(nullptr),
|
|
byte_offset(0),
|
|
content(content),
|
|
encoding(TSInputEncodingUTF8),
|
|
ranges_read({}) {}
|
|
|
|
SpyInput::~SpyInput() {
|
|
delete[] buffer;
|
|
}
|
|
|
|
static void add_byte_range(vector<pair<uint32_t, uint32_t>> *ranges,
|
|
uint32_t start, uint32_t count) {
|
|
uint32_t end = start + count;
|
|
for (auto &range : *ranges) {
|
|
if (range.first <= start && start <= range.second) {
|
|
if (start < range.first) range.first = start;
|
|
if (end > range.second) range.second = end;
|
|
return;
|
|
}
|
|
}
|
|
ranges->push_back({start, end});
|
|
}
|
|
|
|
const char * SpyInput::read(void *payload, uint32_t *bytes_read) {
|
|
auto spy = static_cast<SpyInput *>(payload);
|
|
|
|
if (spy->byte_offset > spy->content.size()) {
|
|
*bytes_read = 0;
|
|
return "";
|
|
}
|
|
|
|
long byte_count = string_byte_for_character(spy->encoding, spy->content, spy->byte_offset, spy->chars_per_chunk);
|
|
if (byte_count < 0)
|
|
byte_count = spy->content.size() - spy->byte_offset;
|
|
|
|
string result = spy->content.substr(spy->byte_offset, byte_count);
|
|
*bytes_read = byte_count;
|
|
add_byte_range(&spy->ranges_read, spy->byte_offset, byte_count);
|
|
spy->byte_offset += byte_count;
|
|
|
|
/*
|
|
* This class stores its entire `content` in a contiguous buffer, but we want
|
|
* to ensure that the code under test cannot accidentally read more than
|
|
* `*bytes_read` bytes past the returned pointer. To make sure that this type
|
|
* of error does not fly, we allocate a separate buffer for each request and
|
|
* return a reference to that buffer, rather than a pointer into the main
|
|
* content. The temporary buffer only fits `*bytes_read` bytes so valgrind
|
|
* can detect code reading too many bytes from the buffer.
|
|
*/
|
|
delete[] spy->buffer;
|
|
if (byte_count) {
|
|
spy->buffer = new char[byte_count];
|
|
memcpy(spy->buffer, result.data(), byte_count);
|
|
} else {
|
|
spy->buffer = nullptr;
|
|
}
|
|
|
|
return spy->buffer;
|
|
}
|
|
|
|
int SpyInput::seek(void *payload, uint32_t character, uint32_t byte) {
|
|
auto spy = static_cast<SpyInput *>(payload);
|
|
spy->byte_offset = byte;
|
|
return 0;
|
|
}
|
|
|
|
vector<string> SpyInput::strings_read() const {
|
|
vector<string> result;
|
|
for (auto &range : ranges_read) {
|
|
result.push_back(content.substr(range.first, range.second - range.first));
|
|
}
|
|
return result;
|
|
}
|
|
|
|
TSInput SpyInput::input() {
|
|
TSInput result;
|
|
result.payload = this;
|
|
result.encoding = encoding;
|
|
result.seek = seek;
|
|
result.read = read;
|
|
result.measure_columns_in_bytes = true;
|
|
return result;
|
|
}
|
|
|
|
static TSPoint get_extent(string text) {
|
|
TSPoint result = {0, 0};
|
|
for (auto i = text.begin(); i != text.end(); i++) {
|
|
if (*i == '\n') {
|
|
result.row++;
|
|
result.column = 0;
|
|
} else {
|
|
result.column++;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
TSInputEdit SpyInput::replace(size_t start_byte, size_t bytes_removed, string text) {
|
|
auto swap = swap_substr(start_byte, bytes_removed, text);
|
|
size_t bytes_added = text.size();
|
|
undo_stack.push_back(SpyInputEdit{start_byte, bytes_added, swap.first});
|
|
TSInputEdit result = {};
|
|
result.start_byte = start_byte;
|
|
result.bytes_added = bytes_added;
|
|
result.bytes_removed = bytes_removed;
|
|
result.start_point = swap.second;
|
|
result.extent_removed = get_extent(swap.first);
|
|
result.extent_added = get_extent(text);
|
|
return result;
|
|
}
|
|
|
|
TSInputEdit SpyInput::undo() {
|
|
SpyInputEdit entry = undo_stack.back();
|
|
undo_stack.pop_back();
|
|
auto swap = swap_substr(entry.start_byte, entry.bytes_removed, entry.text_inserted);
|
|
TSInputEdit result;
|
|
result.start_byte = entry.start_byte;
|
|
result.bytes_removed = entry.bytes_removed;
|
|
result.bytes_added = entry.text_inserted.size();
|
|
result.start_point = swap.second;
|
|
result.extent_removed = get_extent(swap.first);
|
|
result.extent_added = get_extent(entry.text_inserted);
|
|
return result;
|
|
}
|
|
|
|
pair<string, TSPoint> SpyInput::swap_substr(size_t start_byte, size_t bytes_removed, string text) {
|
|
TSPoint start_position = {0, 0};
|
|
for (auto i = content.begin(), n = content.begin() + start_byte; i < n; i++) {
|
|
if (*i == '\n') {
|
|
start_position.row++;
|
|
start_position.column = 0;
|
|
} else {
|
|
start_position.column++;
|
|
}
|
|
}
|
|
|
|
string text_removed = content.substr(start_byte, bytes_removed);
|
|
content.erase(start_byte, bytes_removed);
|
|
content.insert(start_byte, text);
|
|
|
|
return {text_removed, start_position};
|
|
}
|
|
|
|
void SpyInput::clear() {
|
|
ranges_read.clear();
|
|
}
|