Blame - third_party/snappy/snappy_test_tool.cc - RealtimeRoboticsGroup/test

blob: a7c779bb7dd1148ba2dd702c1801843d2888fdff [file] [log] [blame]

James Kuszmaul	48dd4c8	2021-10-27 20:04:08 -0700	[diff] [blame]	1	// Copyright 2020 Google Inc. All Rights Reserved.
				2	//
				3	// Redistribution and use in source and binary forms, with or without
				4	// modification, are permitted provided that the following conditions are
				5	// met:
				6	//
				7	// * Redistributions of source code must retain the above copyright
				8	// notice, this list of conditions and the following disclaimer.
				9	// * Redistributions in binary form must reproduce the above
				10	// copyright notice, this list of conditions and the following disclaimer
				11	// in the documentation and/or other materials provided with the
				12	// distribution.
				13	// * Neither the name of Google Inc. nor the names of its
				14	// contributors may be used to endorse or promote products derived from
				15	// this software without specific prior written permission.
				16	//
				17	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				18	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				19	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				20	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				21	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				22	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				23	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				24	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				25	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				26	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				27	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				28
				29	#include <algorithm>
				30	#include <cmath>
				31	#include <cstdint>
				32	#include <cstdlib>
				33	#include <random>
				34	#include <string>
				35	#include <utility>
				36	#include <vector>
				37
				38	#include "snappy-test.h"
				39
				40	#include "snappy-internal.h"
				41	#include "snappy-sinksource.h"
				42	#include "snappy.h"
				43	#include "snappy_test_data.h"
				44
				45	SNAPPY_FLAG(int32_t, start_len, -1,
				46	"Starting prefix size for testing (-1: just full file contents)");
				47	SNAPPY_FLAG(int32_t, end_len, -1,
				48	"Starting prefix size for testing (-1: just full file contents)");
				49	SNAPPY_FLAG(int32_t, bytes, 10485760,
				50	"How many bytes to compress/uncompress per file for timing");
				51
				52	SNAPPY_FLAG(bool, zlib, true,
				53	"Run zlib compression (http://www.zlib.net)");
				54	SNAPPY_FLAG(bool, lzo, true,
				55	"Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
				56	SNAPPY_FLAG(bool, lz4, true,
				57	"Run LZ4 compression (https://github.com/lz4/lz4)");
				58	SNAPPY_FLAG(bool, snappy, true, "Run snappy compression");
				59
				60	SNAPPY_FLAG(bool, write_compressed, false,
				61	"Write compressed versions of each file to <file>.comp");
				62	SNAPPY_FLAG(bool, write_uncompressed, false,
				63	"Write uncompressed versions of each file to <file>.uncomp");
				64
				65	namespace snappy {
				66
				67	namespace {
				68
				69	#if HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF
				70
				71	// To test against code that reads beyond its input, this class copies a
				72	// string to a newly allocated group of pages, the last of which
				73	// is made unreadable via mprotect. Note that we need to allocate the
				74	// memory with mmap(), as POSIX allows mprotect() only on memory allocated
				75	// with mmap(), and some malloc/posix_memalign implementations expect to
				76	// be able to read previously allocated memory while doing heap allocations.
				77	class DataEndingAtUnreadablePage {
				78	public:
				79	explicit DataEndingAtUnreadablePage(const std::string& s) {
				80	const size_t page_size = sysconf(_SC_PAGESIZE);
				81	const size_t size = s.size();
				82	// Round up space for string to a multiple of page_size.
				83	size_t space_for_string = (size + page_size - 1) & ~(page_size - 1);
				84	alloc_size_ = space_for_string + page_size;
				85	mem_ = mmap(NULL, alloc_size_,
				86	PROT_READ\|PROT_WRITE, MAP_PRIVATE\|MAP_ANONYMOUS, -1, 0);
				87	CHECK_NE(MAP_FAILED, mem_);
				88	protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
				89	char* dst = protected_page_ - size;
				90	std::memcpy(dst, s.data(), size);
				91	data_ = dst;
				92	size_ = size;
				93	// Make guard page unreadable.
				94	CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_NONE));
				95	}
				96
				97	~DataEndingAtUnreadablePage() {
				98	const size_t page_size = sysconf(_SC_PAGESIZE);
				99	// Undo the mprotect.
				100	CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ\|PROT_WRITE));
				101	CHECK_EQ(0, munmap(mem_, alloc_size_));
				102	}
				103
				104	const char* data() const { return data_; }
				105	size_t size() const { return size_; }
				106
				107	private:
				108	size_t alloc_size_;
				109	void* mem_;
				110	char* protected_page_;
				111	const char* data_;
				112	size_t size_;
				113	};
				114
				115	#else // HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF
				116
				117	// Fallback for systems without mmap.
				118	using DataEndingAtUnreadablePage = std::string;
				119
				120	#endif
				121
				122	enum CompressorType { ZLIB, LZO, LZ4, SNAPPY };
				123
				124	const char* names[] = {"ZLIB", "LZO", "LZ4", "SNAPPY"};
				125
				126	size_t MinimumRequiredOutputSpace(size_t input_size, CompressorType comp) {
				127	switch (comp) {
				128	#ifdef ZLIB_VERSION
				129	case ZLIB:
				130	return ZLib::MinCompressbufSize(input_size);
				131	#endif // ZLIB_VERSION
				132
				133	#ifdef LZO_VERSION
				134	case LZO:
				135	return input_size + input_size/64 + 16 + 3;
				136	#endif // LZO_VERSION
				137
				138	#ifdef LZ4_VERSION_NUMBER
				139	case LZ4:
				140	return LZ4_compressBound(input_size);
				141	#endif // LZ4_VERSION_NUMBER
				142
				143	case SNAPPY:
				144	return snappy::MaxCompressedLength(input_size);
				145
				146	default:
				147	LOG(FATAL) << "Unknown compression type number " << comp;
				148	return 0;
				149	}
				150	}
				151
				152	// Returns true if we successfully compressed, false otherwise.
				153	//
				154	// If compressed_is_preallocated is set, do not resize the compressed buffer.
				155	// This is typically what you want for a benchmark, in order to not spend
				156	// time in the memory allocator. If you do set this flag, however,
				157	// "compressed" must be preinitialized to at least MinCompressbufSize(comp)
				158	// number of bytes, and may contain junk bytes at the end after return.
				159	bool Compress(const char* input, size_t input_size, CompressorType comp,
				160	std::string* compressed, bool compressed_is_preallocated) {
				161	if (!compressed_is_preallocated) {
				162	compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
				163	}
				164
				165	switch (comp) {
				166	#ifdef ZLIB_VERSION
				167	case ZLIB: {
				168	ZLib zlib;
				169	uLongf destlen = compressed->size();
				170	int ret = zlib.Compress(
				171	reinterpret_cast<Bytef*>(string_as_array(compressed)),
				172	&destlen,
				173	reinterpret_cast<const Bytef*>(input),
				174	input_size);
				175	CHECK_EQ(Z_OK, ret);
				176	if (!compressed_is_preallocated) {
				177	compressed->resize(destlen);
				178	}
				179	return true;
				180	}
				181	#endif // ZLIB_VERSION
				182
				183	#ifdef LZO_VERSION
				184	case LZO: {
				185	unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS];
				186	lzo_uint destlen;
				187	int ret = lzo1x_1_15_compress(
				188	reinterpret_cast<const uint8_t*>(input),
				189	input_size,
				190	reinterpret_cast<uint8_t*>(string_as_array(compressed)),
				191	&destlen,
				192	mem);
				193	CHECK_EQ(LZO_E_OK, ret);
				194	delete[] mem;
				195	if (!compressed_is_preallocated) {
				196	compressed->resize(destlen);
				197	}
				198	break;
				199	}
				200	#endif // LZO_VERSION
				201
				202	#ifdef LZ4_VERSION_NUMBER
				203	case LZ4: {
				204	int destlen = compressed->size();
				205	destlen = LZ4_compress_default(input, string_as_array(compressed),
				206	input_size, destlen);
				207	CHECK_NE(destlen, 0);
				208	if (!compressed_is_preallocated) {
				209	compressed->resize(destlen);
				210	}
				211	break;
				212	}
				213	#endif // LZ4_VERSION_NUMBER
				214
				215	case SNAPPY: {
				216	size_t destlen;
				217	snappy::RawCompress(input, input_size,
				218	string_as_array(compressed),
				219	&destlen);
				220	CHECK_LE(destlen, snappy::MaxCompressedLength(input_size));
				221	if (!compressed_is_preallocated) {
				222	compressed->resize(destlen);
				223	}
				224	break;
				225	}
				226
				227	default: {
				228	return false; // the asked-for library wasn't compiled in
				229	}
				230	}
				231	return true;
				232	}
				233
				234	bool Uncompress(const std::string& compressed, CompressorType comp, int size,
				235	std::string* output) {
				236	// TODO: Switch to [[maybe_unused]] when we can assume C++17.
				237	(void)size;
				238	switch (comp) {
				239	#ifdef ZLIB_VERSION
				240	case ZLIB: {
				241	output->resize(size);
				242	ZLib zlib;
				243	uLongf destlen = output->size();
				244	int ret = zlib.Uncompress(
				245	reinterpret_cast<Bytef*>(string_as_array(output)),
				246	&destlen,
				247	reinterpret_cast<const Bytef*>(compressed.data()),
				248	compressed.size());
				249	CHECK_EQ(Z_OK, ret);
				250	CHECK_EQ(static_cast<uLongf>(size), destlen);
				251	break;
				252	}
				253	#endif // ZLIB_VERSION
				254
				255	#ifdef LZO_VERSION
				256	case LZO: {
				257	output->resize(size);
				258	lzo_uint destlen;
				259	int ret = lzo1x_decompress(
				260	reinterpret_cast<const uint8_t*>(compressed.data()),
				261	compressed.size(),
				262	reinterpret_cast<uint8_t*>(string_as_array(output)),
				263	&destlen,
				264	NULL);
				265	CHECK_EQ(LZO_E_OK, ret);
				266	CHECK_EQ(static_cast<lzo_uint>(size), destlen);
				267	break;
				268	}
				269	#endif // LZO_VERSION
				270
				271	#ifdef LZ4_VERSION_NUMBER
				272	case LZ4: {
				273	output->resize(size);
				274	int destlen = output->size();
				275	destlen = LZ4_decompress_safe(compressed.data(), string_as_array(output),
				276	compressed.size(), destlen);
				277	CHECK_NE(destlen, 0);
				278	CHECK_EQ(size, destlen);
				279	break;
				280	}
				281	#endif // LZ4_VERSION_NUMBER
				282	case SNAPPY: {
				283	snappy::RawUncompress(compressed.data(), compressed.size(),
				284	string_as_array(output));
				285	break;
				286	}
				287
				288	default: {
				289	return false; // the asked-for library wasn't compiled in
				290	}
				291	}
				292	return true;
				293	}
				294
				295	void Measure(const char* data, size_t length, CompressorType comp, int repeats,
				296	int block_size) {
				297	// Run tests a few time and pick median running times
				298	static const int kRuns = 5;
				299	double ctime[kRuns];
				300	double utime[kRuns];
				301	int compressed_size = 0;
				302
				303	{
				304	// Chop the input into blocks
				305	int num_blocks = (length + block_size - 1) / block_size;
				306	std::vector<const char*> input(num_blocks);
				307	std::vector<size_t> input_length(num_blocks);
				308	std::vector<std::string> compressed(num_blocks);
				309	std::vector<std::string> output(num_blocks);
				310	for (int b = 0; b < num_blocks; ++b) {
				311	int input_start = b * block_size;
				312	int input_limit = std::min<int>((b+1)*block_size, length);
				313	input[b] = data+input_start;
				314	input_length[b] = input_limit-input_start;
				315	}
				316
				317	// Pre-grow the output buffers so we don't measure string append time.
				318	for (std::string& compressed_block : compressed) {
				319	compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp));
				320	}
				321
				322	// First, try one trial compression to make sure the code is compiled in
				323	if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) {
				324	LOG(WARNING) << "Skipping " << names[comp] << ": "
				325	<< "library not compiled in";
				326	return;
				327	}
				328
				329	for (int run = 0; run < kRuns; ++run) {
				330	CycleTimer ctimer, utimer;
				331
				332	// Pre-grow the output buffers so we don't measure string append time.
				333	for (std::string& compressed_block : compressed) {
				334	compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp));
				335	}
				336
				337	ctimer.Start();
				338	for (int b = 0; b < num_blocks; ++b) {
				339	for (int i = 0; i < repeats; ++i)
				340	Compress(input[b], input_length[b], comp, &compressed[b], true);
				341	}
				342	ctimer.Stop();
				343
				344	// Compress once more, with resizing, so we don't leave junk
				345	// at the end that will confuse the decompressor.
				346	for (int b = 0; b < num_blocks; ++b) {
				347	Compress(input[b], input_length[b], comp, &compressed[b], false);
				348	}
				349
				350	for (int b = 0; b < num_blocks; ++b) {
				351	output[b].resize(input_length[b]);
				352	}
				353
				354	utimer.Start();
				355	for (int i = 0; i < repeats; ++i) {
				356	for (int b = 0; b < num_blocks; ++b)
				357	Uncompress(compressed[b], comp, input_length[b], &output[b]);
				358	}
				359	utimer.Stop();
				360
				361	ctime[run] = ctimer.Get();
				362	utime[run] = utimer.Get();
				363	}
				364
				365	compressed_size = 0;
				366	for (const std::string& compressed_item : compressed) {
				367	compressed_size += compressed_item.size();
				368	}
				369	}
				370
				371	std::sort(ctime, ctime + kRuns);
				372	std::sort(utime, utime + kRuns);
				373	const int med = kRuns/2;
				374
				375	float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
				376	float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
				377	std::string x = names[comp];
				378	x += ":";
				379	std::string urate = (uncomp_rate >= 0) ? StrFormat("%.1f", uncomp_rate)
				380	: std::string("?");
				381	std::printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%% "
				382	"comp %5.1f MB/s uncomp %5s MB/s\n",
				383	x.c_str(),
				384	block_size/(1<<20),
				385	static_cast<int>(length), static_cast<uint32_t>(compressed_size),
				386	(compressed_size * 100.0) / std::max<int>(1, length),
				387	comp_rate,
				388	urate.c_str());
				389	}
				390
				391	void CompressFile(const char* fname) {
				392	std::string fullinput;
				393	CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
				394
				395	std::string compressed;
				396	Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
				397
				398	CHECK_OK(file::SetContents(std::string(fname).append(".comp"), compressed,
				399	file::Defaults()));
				400	}
				401
				402	void UncompressFile(const char* fname) {
				403	std::string fullinput;
				404	CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
				405
				406	size_t uncompLength;
				407	CHECK(snappy::GetUncompressedLength(fullinput.data(), fullinput.size(),
				408	&uncompLength));
				409
				410	std::string uncompressed;
				411	uncompressed.resize(uncompLength);
				412	CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
				413
				414	CHECK_OK(file::SetContents(std::string(fname).append(".uncomp"), uncompressed,
				415	file::Defaults()));
				416	}
				417
				418	void MeasureFile(const char* fname) {
				419	std::string fullinput;
				420	CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
				421	std::printf("%-40s :\n", fname);
				422
				423	int start_len = (snappy::GetFlag(FLAGS_start_len) < 0)
				424	? fullinput.size()
				425	: snappy::GetFlag(FLAGS_start_len);
				426	int end_len = fullinput.size();
				427	if (snappy::GetFlag(FLAGS_end_len) >= 0) {
				428	end_len = std::min<int>(fullinput.size(), snappy::GetFlag(FLAGS_end_len));
				429	}
				430	for (int len = start_len; len <= end_len; ++len) {
				431	const char* const input = fullinput.data();
				432	int repeats = (snappy::GetFlag(FLAGS_bytes) + len) / (len + 1);
				433	if (snappy::GetFlag(FLAGS_zlib))
				434	Measure(input, len, ZLIB, repeats, 1024 << 10);
				435	if (snappy::GetFlag(FLAGS_lzo))
				436	Measure(input, len, LZO, repeats, 1024 << 10);
				437	if (snappy::GetFlag(FLAGS_lz4))
				438	Measure(input, len, LZ4, repeats, 1024 << 10);
				439	if (snappy::GetFlag(FLAGS_snappy))
				440	Measure(input, len, SNAPPY, repeats, 4096 << 10);
				441
				442	// For block-size based measurements
				443	if (0 && snappy::GetFlag(FLAGS_snappy)) {
				444	Measure(input, len, SNAPPY, repeats, 8<<10);
				445	Measure(input, len, SNAPPY, repeats, 16<<10);
				446	Measure(input, len, SNAPPY, repeats, 32<<10);
				447	Measure(input, len, SNAPPY, repeats, 64<<10);
				448	Measure(input, len, SNAPPY, repeats, 256<<10);
				449	Measure(input, len, SNAPPY, repeats, 1024<<10);
				450	}
				451	}
				452	}
				453
				454	} // namespace
				455
				456	} // namespace snappy
				457
				458	int main(int argc, char** argv) {
				459	InitGoogle(argv[0], &argc, &argv, true);
				460
				461	for (int arg = 1; arg < argc; ++arg) {
				462	if (snappy::GetFlag(FLAGS_write_compressed)) {
				463	snappy::CompressFile(argv[arg]);
				464	} else if (snappy::GetFlag(FLAGS_write_uncompressed)) {
				465	snappy::UncompressFile(argv[arg]);
				466	} else {
				467	snappy::MeasureFile(argv[arg]);
				468	}
				469	}
				470	return 0;
				471	}