tree-sitter/test/helpers/spy_input.cc

161 lines
4.6 KiB
C++
Raw Normal View History

#include "helpers/spy_input.h"
#include "helpers/encoding_helpers.h"
2014-09-26 16:21:09 -07:00
#include <string.h>
#include <algorithm>
2015-09-19 14:46:14 -07:00
#include <assert.h>
2016-09-13 13:08:52 -07:00
using std::pair;
using std::string;
using std::vector;
2015-09-19 13:29:30 -07:00
static const size_t UTF8_MAX_CHAR_SIZE = 4;
2015-07-16 17:32:19 -07:00
SpyInput::SpyInput(string content, size_t chars_per_chunk) :
2015-09-19 13:29:30 -07:00
chars_per_chunk(chars_per_chunk),
Rework SpyInput buffer handling SpyInput uses a fixed-size buffer and explicitly zeros memory which is good for catching logic errors but defeats valgrind's memory tracking. Use a separate buffer of exactly the correct size for each request. This correctly catches the problem under valgrind: ``` ==8694== Invalid read of size 2 ==8694== at 0x54EFFB: utf16_iterate (utf16.c:10) ==8694== by 0x551126: ts_lexer__get_lookahead (lexer.c:54) ==8694== by 0x5515CD: ts_lexer_start (lexer.c:154) ==8694== by 0x54699F: parser(long,...)(long long) (parser.c:297) ==8694== by 0x54788A: parser__get_lookahead (parser.c:439) ==8694== by 0x54B2D3: parser__advance (parser.c:1150) ==8694== by 0x54C2AA: parser_parse (parser.c:1348) ==8694== by 0x53F063: ts_document_parse_with_options (document.c:136) ==8694== by 0x53EF43: ts_document_parse (document.c:107) ==8694== by 0x4AED11: {lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda()#4}::operator()() const (document_test.cc:82) ==8694== by 0x4B56B6: std::_Function_handler<void (), {lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda()#4}>::_M_invoke(std::_Any_data const&) (functional:1871) ==8694== by 0x40F8C5: std::function<void ()>::operator()() const (functional:2267) ==8694== Address 0x5d08be0 is 0 bytes inside a block of size 1 alloc'd ==8694== at 0x4C2E80F: operator new[](unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==8694== by 0x507C3E: SpyInput::read(void*, unsigned int*) (spy_input.cc:66) ==8694== by 0x55103D: ts_lexer__get_chunk (lexer.c:29) ==8694== by 0x5515B6: ts_lexer_start (lexer.c:152) ==8694== by 0x54699F: parser(long,...)(long long) (parser.c:297) ==8694== by 0x54788A: parser__get_lookahead (parser.c:439) ==8694== by 0x54B2D3: parser__advance (parser.c:1150) ==8694== by 0x54C2AA: parser_parse (parser.c:1348) ==8694== by 0x53F063: ts_document_parse_with_options (document.c:136) ==8694== by 0x53EF43: ts_document_parse (document.c:107) ==8694== by 0x4AED11: {lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda()#4}::operator()() const (document_test.cc:82) ==8694== by 0x4B56B6: std::_Function_handler<void (), {lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda()#4}>::_M_invoke(std::_Any_data const&) (functional:1871) ```
2017-07-18 11:54:42 -07:00
buffer(nullptr),
2015-09-19 13:29:30 -07:00
byte_offset(0),
content(content),
encoding(TSInputEncodingUTF8),
ranges_read({}) {}
2015-07-16 17:32:19 -07:00
SpyInput::~SpyInput() {
delete[] buffer;
2014-10-03 14:19:04 -07:00
}
static void add_byte_range(vector<pair<uint32_t, uint32_t>> *ranges,
uint32_t start, uint32_t count) {
uint32_t end = start + count;
for (auto &range : *ranges) {
if (range.first <= start && start <= range.second) {
if (start < range.first) range.first = start;
if (end > range.second) range.second = end;
return;
}
}
ranges->push_back({start, end});
}
const char * SpyInput::read(void *payload, uint32_t *bytes_read) {
2015-09-19 13:29:30 -07:00
auto spy = static_cast<SpyInput *>(payload);
if (spy->byte_offset > spy->content.size()) {
*bytes_read = 0;
return "";
}
long byte_count = string_byte_for_character(spy->encoding, spy->content, spy->byte_offset, spy->chars_per_chunk);
2015-07-16 17:29:16 -07:00
if (byte_count < 0)
2015-09-19 13:29:30 -07:00
byte_count = spy->content.size() - spy->byte_offset;
2015-07-16 17:29:16 -07:00
string result = spy->content.substr(spy->byte_offset, byte_count);
2015-07-16 17:29:16 -07:00
*bytes_read = byte_count;
add_byte_range(&spy->ranges_read, spy->byte_offset, byte_count);
2015-09-19 13:29:30 -07:00
spy->byte_offset += byte_count;
2015-07-16 17:29:16 -07:00
/*
* This class stores its entire `content` in a contiguous buffer, but we want
* to ensure that the code under test cannot accidentally read more than
* `*bytes_read` bytes past the returned pointer. To make sure that this type
Rework SpyInput buffer handling SpyInput uses a fixed-size buffer and explicitly zeros memory which is good for catching logic errors but defeats valgrind's memory tracking. Use a separate buffer of exactly the correct size for each request. This correctly catches the problem under valgrind: ``` ==8694== Invalid read of size 2 ==8694== at 0x54EFFB: utf16_iterate (utf16.c:10) ==8694== by 0x551126: ts_lexer__get_lookahead (lexer.c:54) ==8694== by 0x5515CD: ts_lexer_start (lexer.c:154) ==8694== by 0x54699F: parser(long,...)(long long) (parser.c:297) ==8694== by 0x54788A: parser__get_lookahead (parser.c:439) ==8694== by 0x54B2D3: parser__advance (parser.c:1150) ==8694== by 0x54C2AA: parser_parse (parser.c:1348) ==8694== by 0x53F063: ts_document_parse_with_options (document.c:136) ==8694== by 0x53EF43: ts_document_parse (document.c:107) ==8694== by 0x4AED11: {lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda()#4}::operator()() const (document_test.cc:82) ==8694== by 0x4B56B6: std::_Function_handler<void (), {lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda()#4}>::_M_invoke(std::_Any_data const&) (functional:1871) ==8694== by 0x40F8C5: std::function<void ()>::operator()() const (functional:2267) ==8694== Address 0x5d08be0 is 0 bytes inside a block of size 1 alloc'd ==8694== at 0x4C2E80F: operator new[](unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==8694== by 0x507C3E: SpyInput::read(void*, unsigned int*) (spy_input.cc:66) ==8694== by 0x55103D: ts_lexer__get_chunk (lexer.c:29) ==8694== by 0x5515B6: ts_lexer_start (lexer.c:152) ==8694== by 0x54699F: parser(long,...)(long long) (parser.c:297) ==8694== by 0x54788A: parser__get_lookahead (parser.c:439) ==8694== by 0x54B2D3: parser__advance (parser.c:1150) ==8694== by 0x54C2AA: parser_parse (parser.c:1348) ==8694== by 0x53F063: ts_document_parse_with_options (document.c:136) ==8694== by 0x53EF43: ts_document_parse (document.c:107) ==8694== by 0x4AED11: {lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda()#4}::operator()() const (document_test.cc:82) ==8694== by 0x4B56B6: std::_Function_handler<void (), {lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda()#4}>::_M_invoke(std::_Any_data const&) (functional:1871) ```
2017-07-18 11:54:42 -07:00
* of error does not fly, we allocate a separate buffer for each request and
2015-07-16 17:29:16 -07:00
* return a reference to that buffer, rather than a pointer into the main
Rework SpyInput buffer handling SpyInput uses a fixed-size buffer and explicitly zeros memory which is good for catching logic errors but defeats valgrind's memory tracking. Use a separate buffer of exactly the correct size for each request. This correctly catches the problem under valgrind: ``` ==8694== Invalid read of size 2 ==8694== at 0x54EFFB: utf16_iterate (utf16.c:10) ==8694== by 0x551126: ts_lexer__get_lookahead (lexer.c:54) ==8694== by 0x5515CD: ts_lexer_start (lexer.c:154) ==8694== by 0x54699F: parser(long,...)(long long) (parser.c:297) ==8694== by 0x54788A: parser__get_lookahead (parser.c:439) ==8694== by 0x54B2D3: parser__advance (parser.c:1150) ==8694== by 0x54C2AA: parser_parse (parser.c:1348) ==8694== by 0x53F063: ts_document_parse_with_options (document.c:136) ==8694== by 0x53EF43: ts_document_parse (document.c:107) ==8694== by 0x4AED11: {lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda()#4}::operator()() const (document_test.cc:82) ==8694== by 0x4B56B6: std::_Function_handler<void (), {lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda()#4}>::_M_invoke(std::_Any_data const&) (functional:1871) ==8694== by 0x40F8C5: std::function<void ()>::operator()() const (functional:2267) ==8694== Address 0x5d08be0 is 0 bytes inside a block of size 1 alloc'd ==8694== at 0x4C2E80F: operator new[](unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==8694== by 0x507C3E: SpyInput::read(void*, unsigned int*) (spy_input.cc:66) ==8694== by 0x55103D: ts_lexer__get_chunk (lexer.c:29) ==8694== by 0x5515B6: ts_lexer_start (lexer.c:152) ==8694== by 0x54699F: parser(long,...)(long long) (parser.c:297) ==8694== by 0x54788A: parser__get_lookahead (parser.c:439) ==8694== by 0x54B2D3: parser__advance (parser.c:1150) ==8694== by 0x54C2AA: parser_parse (parser.c:1348) ==8694== by 0x53F063: ts_document_parse_with_options (document.c:136) ==8694== by 0x53EF43: ts_document_parse (document.c:107) ==8694== by 0x4AED11: {lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda()#4}::operator()() const (document_test.cc:82) ==8694== by 0x4B56B6: std::_Function_handler<void (), {lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda()#4}>::_M_invoke(std::_Any_data const&) (functional:1871) ```
2017-07-18 11:54:42 -07:00
* content. The temporary buffer only fits `*bytes_read` bytes so valgrind
* can detect code reading too many bytes from the buffer.
2015-07-16 17:29:16 -07:00
*/
Rework SpyInput buffer handling SpyInput uses a fixed-size buffer and explicitly zeros memory which is good for catching logic errors but defeats valgrind's memory tracking. Use a separate buffer of exactly the correct size for each request. This correctly catches the problem under valgrind: ``` ==8694== Invalid read of size 2 ==8694== at 0x54EFFB: utf16_iterate (utf16.c:10) ==8694== by 0x551126: ts_lexer__get_lookahead (lexer.c:54) ==8694== by 0x5515CD: ts_lexer_start (lexer.c:154) ==8694== by 0x54699F: parser(long,...)(long long) (parser.c:297) ==8694== by 0x54788A: parser__get_lookahead (parser.c:439) ==8694== by 0x54B2D3: parser__advance (parser.c:1150) ==8694== by 0x54C2AA: parser_parse (parser.c:1348) ==8694== by 0x53F063: ts_document_parse_with_options (document.c:136) ==8694== by 0x53EF43: ts_document_parse (document.c:107) ==8694== by 0x4AED11: {lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda()#4}::operator()() const (document_test.cc:82) ==8694== by 0x4B56B6: std::_Function_handler<void (), {lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda()#4}>::_M_invoke(std::_Any_data const&) (functional:1871) ==8694== by 0x40F8C5: std::function<void ()>::operator()() const (functional:2267) ==8694== Address 0x5d08be0 is 0 bytes inside a block of size 1 alloc'd ==8694== at 0x4C2E80F: operator new[](unsigned long) (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so) ==8694== by 0x507C3E: SpyInput::read(void*, unsigned int*) (spy_input.cc:66) ==8694== by 0x55103D: ts_lexer__get_chunk (lexer.c:29) ==8694== by 0x5515B6: ts_lexer_start (lexer.c:152) ==8694== by 0x54699F: parser(long,...)(long long) (parser.c:297) ==8694== by 0x54788A: parser__get_lookahead (parser.c:439) ==8694== by 0x54B2D3: parser__advance (parser.c:1150) ==8694== by 0x54C2AA: parser_parse (parser.c:1348) ==8694== by 0x53F063: ts_document_parse_with_options (document.c:136) ==8694== by 0x53EF43: ts_document_parse (document.c:107) ==8694== by 0x4AED11: {lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda()#4}::operator()() const (document_test.cc:82) ==8694== by 0x4B56B6: std::_Function_handler<void (), {lambda()#1}::operator()() const::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda()#4}>::_M_invoke(std::_Any_data const&) (functional:1871) ```
2017-07-18 11:54:42 -07:00
delete[] spy->buffer;
if (byte_count) {
spy->buffer = new char[byte_count];
memcpy(spy->buffer, result.data(), byte_count);
} else {
spy->buffer = nullptr;
}
2015-09-19 13:29:30 -07:00
return spy->buffer;
2014-10-03 14:19:04 -07:00
}
int SpyInput::seek(void *payload, uint32_t character, uint32_t byte) {
2015-09-19 13:29:30 -07:00
auto spy = static_cast<SpyInput *>(payload);
spy->byte_offset = byte;
2014-10-03 14:19:04 -07:00
return 0;
}
vector<string> SpyInput::strings_read() const {
vector<string> result;
for (auto &range : ranges_read) {
result.push_back(content.substr(range.first, range.second - range.first));
}
return result;
}
2015-07-16 17:32:19 -07:00
TSInput SpyInput::input() {
TSInput result;
result.payload = this;
result.encoding = encoding;
result.seek = seek;
result.read = read;
result.measure_columns_in_bytes = true;
return result;
}
2016-09-13 13:08:52 -07:00
static TSPoint get_extent(string text) {
TSPoint result = {0, 0};
for (auto i = text.begin(); i != text.end(); i++) {
if (*i == '\n') {
result.row++;
result.column = 0;
} else {
result.column++;
}
}
return result;
}
TSInputEdit SpyInput::replace(size_t start_byte, size_t bytes_removed, string text) {
auto swap = swap_substr(start_byte, bytes_removed, text);
size_t bytes_added = text.size();
undo_stack.push_back(SpyInputEdit{start_byte, bytes_added, swap.first});
TSInputEdit result = {};
result.start_byte = start_byte;
result.bytes_added = bytes_added;
result.bytes_removed = bytes_removed;
result.start_point = swap.second;
result.extent_removed = get_extent(swap.first);
result.extent_added = get_extent(text);
return result;
2015-09-19 14:46:14 -07:00
}
TSInputEdit SpyInput::undo() {
SpyInputEdit entry = undo_stack.back();
undo_stack.pop_back();
2016-09-13 13:08:52 -07:00
auto swap = swap_substr(entry.start_byte, entry.bytes_removed, entry.text_inserted);
TSInputEdit result;
result.start_byte = entry.start_byte;
result.bytes_removed = entry.bytes_removed;
result.bytes_added = entry.text_inserted.size();
result.start_point = swap.second;
result.extent_removed = get_extent(swap.first);
result.extent_added = get_extent(entry.text_inserted);
return result;
}
2016-09-13 13:08:52 -07:00
pair<string, TSPoint> SpyInput::swap_substr(size_t start_byte, size_t bytes_removed, string text) {
TSPoint start_position = {0, 0};
for (auto i = content.begin(), n = content.begin() + start_byte; i < n; i++) {
if (*i == '\n') {
start_position.row++;
start_position.column = 0;
} else {
start_position.column++;
}
}
2015-09-19 14:46:14 -07:00
string text_removed = content.substr(start_byte, bytes_removed);
content.erase(start_byte, bytes_removed);
content.insert(start_byte, text);
2016-09-13 13:08:52 -07:00
return {text_removed, start_position};
}
2015-07-16 17:32:19 -07:00
void SpyInput::clear() {
ranges_read.clear();
}