Blame - snappy_unittest.cc - RealtimeRoboticsGroup/test

blob: 292004ca84443d4117e9e690da352e80dfd7499f [file] [log] [blame]

James Kuszmaul	48dd4c8	2021-10-27 20:04:08 -0700	[diff] [blame^]	1	// Copyright 2005 and onwards Google Inc.
				2	//
				3	// Redistribution and use in source and binary forms, with or without
				4	// modification, are permitted provided that the following conditions are
				5	// met:
				6	//
				7	// * Redistributions of source code must retain the above copyright
				8	// notice, this list of conditions and the following disclaimer.
				9	// * Redistributions in binary form must reproduce the above
				10	// copyright notice, this list of conditions and the following disclaimer
				11	// in the documentation and/or other materials provided with the
				12	// distribution.
				13	// * Neither the name of Google Inc. nor the names of its
				14	// contributors may be used to endorse or promote products derived from
				15	// this software without specific prior written permission.
				16	//
				17	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				18	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				19	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				20	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				21	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				22	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				23	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				24	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				25	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				26	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				27	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				28
				29	#include <algorithm>
				30	#include <cmath>
				31	#include <cstdlib>
				32	#include <random>
				33	#include <string>
				34	#include <utility>
				35	#include <vector>
				36
				37	#include "snappy-test.h"
				38
				39	#include "gtest/gtest.h"
				40
				41	#include "snappy-internal.h"
				42	#include "snappy-sinksource.h"
				43	#include "snappy.h"
				44	#include "snappy_test_data.h"
				45
				46	SNAPPY_FLAG(bool, snappy_dump_decompression_table, false,
				47	"If true, we print the decompression table during tests.");
				48
				49	namespace snappy {
				50
				51	namespace {
				52
				53	#if HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF
				54
				55	// To test against code that reads beyond its input, this class copies a
				56	// string to a newly allocated group of pages, the last of which
				57	// is made unreadable via mprotect. Note that we need to allocate the
				58	// memory with mmap(), as POSIX allows mprotect() only on memory allocated
				59	// with mmap(), and some malloc/posix_memalign implementations expect to
				60	// be able to read previously allocated memory while doing heap allocations.
				61	class DataEndingAtUnreadablePage {
				62	public:
				63	explicit DataEndingAtUnreadablePage(const std::string& s) {
				64	const size_t page_size = sysconf(_SC_PAGESIZE);
				65	const size_t size = s.size();
				66	// Round up space for string to a multiple of page_size.
				67	size_t space_for_string = (size + page_size - 1) & ~(page_size - 1);
				68	alloc_size_ = space_for_string + page_size;
				69	mem_ = mmap(NULL, alloc_size_,
				70	PROT_READ\|PROT_WRITE, MAP_PRIVATE\|MAP_ANONYMOUS, -1, 0);
				71	CHECK_NE(MAP_FAILED, mem_);
				72	protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
				73	char* dst = protected_page_ - size;
				74	std::memcpy(dst, s.data(), size);
				75	data_ = dst;
				76	size_ = size;
				77	// Make guard page unreadable.
				78	CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_NONE));
				79	}
				80
				81	~DataEndingAtUnreadablePage() {
				82	const size_t page_size = sysconf(_SC_PAGESIZE);
				83	// Undo the mprotect.
				84	CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ\|PROT_WRITE));
				85	CHECK_EQ(0, munmap(mem_, alloc_size_));
				86	}
				87
				88	const char* data() const { return data_; }
				89	size_t size() const { return size_; }
				90
				91	private:
				92	size_t alloc_size_;
				93	void* mem_;
				94	char* protected_page_;
				95	const char* data_;
				96	size_t size_;
				97	};
				98
				99	#else // HAVE_FUNC_MMAP) && HAVE_FUNC_SYSCONF
				100
				101	// Fallback for systems without mmap.
				102	using DataEndingAtUnreadablePage = std::string;
				103
				104	#endif
				105
				106	int VerifyString(const std::string& input) {
				107	std::string compressed;
				108	DataEndingAtUnreadablePage i(input);
				109	const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
				110	CHECK_EQ(written, compressed.size());
				111	CHECK_LE(compressed.size(),
				112	snappy::MaxCompressedLength(input.size()));
				113	CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
				114
				115	std::string uncompressed;
				116	DataEndingAtUnreadablePage c(compressed);
				117	CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
				118	CHECK_EQ(uncompressed, input);
				119	return uncompressed.size();
				120	}
				121
				122	void VerifyStringSink(const std::string& input) {
				123	std::string compressed;
				124	DataEndingAtUnreadablePage i(input);
				125	const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
				126	CHECK_EQ(written, compressed.size());
				127	CHECK_LE(compressed.size(),
				128	snappy::MaxCompressedLength(input.size()));
				129	CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
				130
				131	std::string uncompressed;
				132	uncompressed.resize(input.size());
				133	snappy::UncheckedByteArraySink sink(string_as_array(&uncompressed));
				134	DataEndingAtUnreadablePage c(compressed);
				135	snappy::ByteArraySource source(c.data(), c.size());
				136	CHECK(snappy::Uncompress(&source, &sink));
				137	CHECK_EQ(uncompressed, input);
				138	}
				139
				140	void VerifyIOVec(const std::string& input) {
				141	std::string compressed;
				142	DataEndingAtUnreadablePage i(input);
				143	const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
				144	CHECK_EQ(written, compressed.size());
				145	CHECK_LE(compressed.size(),
				146	snappy::MaxCompressedLength(input.size()));
				147	CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
				148
				149	// Try uncompressing into an iovec containing a random number of entries
				150	// ranging from 1 to 10.
				151	char* buf = new char[input.size()];
				152	std::minstd_rand0 rng(input.size());
				153	std::uniform_int_distribution<size_t> uniform_1_to_10(1, 10);
				154	size_t num = uniform_1_to_10(rng);
				155	if (input.size() < num) {
				156	num = input.size();
				157	}
				158	struct iovec* iov = new iovec[num];
				159	size_t used_so_far = 0;
				160	std::bernoulli_distribution one_in_five(1.0 / 5);
				161	for (size_t i = 0; i < num; ++i) {
				162	assert(used_so_far < input.size());
				163	iov[i].iov_base = buf + used_so_far;
				164	if (i == num - 1) {
				165	iov[i].iov_len = input.size() - used_so_far;
				166	} else {
				167	// Randomly choose to insert a 0 byte entry.
				168	if (one_in_five(rng)) {
				169	iov[i].iov_len = 0;
				170	} else {
				171	std::uniform_int_distribution<size_t> uniform_not_used_so_far(
				172	0, input.size() - used_so_far - 1);
				173	iov[i].iov_len = uniform_not_used_so_far(rng);
				174	}
				175	}
				176	used_so_far += iov[i].iov_len;
				177	}
				178	CHECK(snappy::RawUncompressToIOVec(
				179	compressed.data(), compressed.size(), iov, num));
				180	CHECK(!memcmp(buf, input.data(), input.size()));
				181	delete[] iov;
				182	delete[] buf;
				183	}
				184
				185	// Test that data compressed by a compressor that does not
				186	// obey block sizes is uncompressed properly.
				187	void VerifyNonBlockedCompression(const std::string& input) {
				188	if (input.length() > snappy::kBlockSize) {
				189	// We cannot test larger blocks than the maximum block size, obviously.
				190	return;
				191	}
				192
				193	std::string prefix;
				194	Varint::Append32(&prefix, input.size());
				195
				196	// Setup compression table
				197	snappy::internal::WorkingMemory wmem(input.size());
				198	int table_size;
				199	uint16_t* table = wmem.GetHashTable(input.size(), &table_size);
				200
				201	// Compress entire input in one shot
				202	std::string compressed;
				203	compressed += prefix;
				204	compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size()));
				205	char* dest = string_as_array(&compressed) + prefix.size();
				206	char* end = snappy::internal::CompressFragment(input.data(), input.size(),
				207	dest, table, table_size);
				208	compressed.resize(end - compressed.data());
				209
				210	// Uncompress into std::string
				211	std::string uncomp_str;
				212	CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str));
				213	CHECK_EQ(uncomp_str, input);
				214
				215	// Uncompress using source/sink
				216	std::string uncomp_str2;
				217	uncomp_str2.resize(input.size());
				218	snappy::UncheckedByteArraySink sink(string_as_array(&uncomp_str2));
				219	snappy::ByteArraySource source(compressed.data(), compressed.size());
				220	CHECK(snappy::Uncompress(&source, &sink));
				221	CHECK_EQ(uncomp_str2, input);
				222
				223	// Uncompress into iovec
				224	{
				225	static const int kNumBlocks = 10;
				226	struct iovec vec[kNumBlocks];
				227	const int block_size = 1 + input.size() / kNumBlocks;
				228	std::string iovec_data(block_size * kNumBlocks, 'x');
				229	for (int i = 0; i < kNumBlocks; ++i) {
				230	vec[i].iov_base = string_as_array(&iovec_data) + i * block_size;
				231	vec[i].iov_len = block_size;
				232	}
				233	CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(),
				234	vec, kNumBlocks));
				235	CHECK_EQ(std::string(iovec_data.data(), input.size()), input);
				236	}
				237	}
				238
				239	// Expand the input so that it is at least K times as big as block size
				240	std::string Expand(const std::string& input) {
				241	static const int K = 3;
				242	std::string data = input;
				243	while (data.size() < K * snappy::kBlockSize) {
				244	data += input;
				245	}
				246	return data;
				247	}
				248
				249	int Verify(const std::string& input) {
				250	VLOG(1) << "Verifying input of size " << input.size();
				251
				252	// Compress using string based routines
				253	const int result = VerifyString(input);
				254
				255	// Verify using sink based routines
				256	VerifyStringSink(input);
				257
				258	VerifyNonBlockedCompression(input);
				259	VerifyIOVec(input);
				260	if (!input.empty()) {
				261	const std::string expanded = Expand(input);
				262	VerifyNonBlockedCompression(expanded);
				263	VerifyIOVec(input);
				264	}
				265
				266	return result;
				267	}
				268
				269	bool IsValidCompressedBuffer(const std::string& c) {
				270	return snappy::IsValidCompressedBuffer(c.data(), c.size());
				271	}
				272	bool Uncompress(const std::string& c, std::string* u) {
				273	return snappy::Uncompress(c.data(), c.size(), u);
				274	}
				275
				276	// This test checks to ensure that snappy doesn't coredump if it gets
				277	// corrupted data.
				278	TEST(CorruptedTest, VerifyCorrupted) {
				279	std::string source = "making sure we don't crash with corrupted input";
				280	VLOG(1) << source;
				281	std::string dest;
				282	std::string uncmp;
				283	snappy::Compress(source.data(), source.size(), &dest);
				284
				285	// Mess around with the data. It's hard to simulate all possible
				286	// corruptions; this is just one example ...
				287	CHECK_GT(dest.size(), 3);
				288	dest[1]--;
				289	dest[3]++;
				290	// this really ought to fail.
				291	CHECK(!IsValidCompressedBuffer(dest));
				292	CHECK(!Uncompress(dest, &uncmp));
				293
				294	// This is testing for a security bug - a buffer that decompresses to 100k
				295	// but we lie in the snappy header and only reserve 0 bytes of memory :)
				296	source.resize(100000);
				297	for (char& source_char : source) {
				298	source_char = 'A';
				299	}
				300	snappy::Compress(source.data(), source.size(), &dest);
				301	dest[0] = dest[1] = dest[2] = dest[3] = 0;
				302	CHECK(!IsValidCompressedBuffer(dest));
				303	CHECK(!Uncompress(dest, &uncmp));
				304
				305	if (sizeof(void *) == 4) {
				306	// Another security check; check a crazy big length can't DoS us with an
				307	// over-allocation.
				308	// Currently this is done only for 32-bit builds. On 64-bit builds,
				309	// where 3 GB might be an acceptable allocation size, Uncompress()
				310	// attempts to decompress, and sometimes causes the test to run out of
				311	// memory.
				312	dest[0] = dest[1] = dest[2] = dest[3] = '\xff';
				313	// This decodes to a really large size, i.e., about 3 GB.
				314	dest[4] = 'k';
				315	CHECK(!IsValidCompressedBuffer(dest));
				316	CHECK(!Uncompress(dest, &uncmp));
				317	} else {
				318	LOG(WARNING) << "Crazy decompression lengths not checked on 64-bit build";
				319	}
				320
				321	// This decodes to about 2 MB; much smaller, but should still fail.
				322	dest[0] = dest[1] = dest[2] = '\xff';
				323	dest[3] = 0x00;
				324	CHECK(!IsValidCompressedBuffer(dest));
				325	CHECK(!Uncompress(dest, &uncmp));
				326
				327	// try reading stuff in from a bad file.
				328	for (int i = 1; i <= 3; ++i) {
				329	std::string data =
				330	ReadTestDataFile(StrFormat("baddata%d.snappy", i).c_str(), 0);
				331	std::string uncmp;
				332	// check that we don't return a crazy length
				333	size_t ulen;
				334	CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen)
				335	\|\| (ulen < (1<<20)));
				336	uint32_t ulen2;
				337	snappy::ByteArraySource source(data.data(), data.size());
				338	CHECK(!snappy::GetUncompressedLength(&source, &ulen2) \|\|
				339	(ulen2 < (1<<20)));
				340	CHECK(!IsValidCompressedBuffer(data));
				341	CHECK(!Uncompress(data, &uncmp));
				342	}
				343	}
				344
				345	// Helper routines to construct arbitrary compressed strings.
				346	// These mirror the compression code in snappy.cc, but are copied
				347	// here so that we can bypass some limitations in the how snappy.cc
				348	// invokes these routines.
				349	void AppendLiteral(std::string* dst, const std::string& literal) {
				350	if (literal.empty()) return;
				351	int n = literal.size() - 1;
				352	if (n < 60) {
				353	// Fit length in tag byte
				354	dst->push_back(0 \| (n << 2));
				355	} else {
				356	// Encode in upcoming bytes
				357	char number[4];
				358	int count = 0;
				359	while (n > 0) {
				360	number[count++] = n & 0xff;
				361	n >>= 8;
				362	}
				363	dst->push_back(0 \| ((59+count) << 2));
				364	*dst += std::string(number, count);
				365	}
				366	*dst += literal;
				367	}
				368
				369	void AppendCopy(std::string* dst, int offset, int length) {
				370	while (length > 0) {
				371	// Figure out how much to copy in one shot
				372	int to_copy;
				373	if (length >= 68) {
				374	to_copy = 64;
				375	} else if (length > 64) {
				376	to_copy = 60;
				377	} else {
				378	to_copy = length;
				379	}
				380	length -= to_copy;
				381
				382	if ((to_copy >= 4) && (to_copy < 12) && (offset < 2048)) {
				383	assert(to_copy-4 < 8); // Must fit in 3 bits
				384	dst->push_back(1 \| ((to_copy-4) << 2) \| ((offset >> 8) << 5));
				385	dst->push_back(offset & 0xff);
				386	} else if (offset < 65536) {
				387	dst->push_back(2 \| ((to_copy-1) << 2));
				388	dst->push_back(offset & 0xff);
				389	dst->push_back(offset >> 8);
				390	} else {
				391	dst->push_back(3 \| ((to_copy-1) << 2));
				392	dst->push_back(offset & 0xff);
				393	dst->push_back((offset >> 8) & 0xff);
				394	dst->push_back((offset >> 16) & 0xff);
				395	dst->push_back((offset >> 24) & 0xff);
				396	}
				397	}
				398	}
				399
				400	TEST(Snappy, SimpleTests) {
				401	Verify("");
				402	Verify("a");
				403	Verify("ab");
				404	Verify("abc");
				405
				406	Verify("aaaaaaa" + std::string(16, 'b') + std::string("aaaaa") + "abc");
				407	Verify("aaaaaaa" + std::string(256, 'b') + std::string("aaaaa") + "abc");
				408	Verify("aaaaaaa" + std::string(2047, 'b') + std::string("aaaaa") + "abc");
				409	Verify("aaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
				410	Verify("abcaaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
				411	}
				412
				413	// Regression test for cr/345340892.
				414	TEST(Snappy, AppendSelfPatternExtensionEdgeCases) {
				415	Verify("abcabcabcabcabcabcab");
				416	Verify("abcabcabcabcabcabcab0123456789ABCDEF");
				417
				418	Verify("abcabcabcabcabcabcabcabcabcabcabcabc");
				419	Verify("abcabcabcabcabcabcabcabcabcabcabcabc0123456789ABCDEF");
				420	}
				421
				422	// Regression test for cr/345340892.
				423	TEST(Snappy, AppendSelfPatternExtensionEdgeCasesExhaustive) {
				424	std::mt19937 rng;
				425	std::uniform_int_distribution<int> uniform_byte(0, 255);
				426	for (int pattern_size = 1; pattern_size <= 18; ++pattern_size) {
				427	for (int length = 1; length <= 64; ++length) {
				428	for (int extra_bytes_after_pattern : {0, 1, 15, 16, 128}) {
				429	const int size = pattern_size + length + extra_bytes_after_pattern;
				430	std::string input;
				431	input.resize(size);
				432	for (int i = 0; i < pattern_size; ++i) {
				433	input[i] = 'a' + i;
				434	}
				435	for (int i = 0; i < length; ++i) {
				436	input[pattern_size + i] = input[i];
				437	}
				438	for (int i = 0; i < extra_bytes_after_pattern; ++i) {
				439	input[pattern_size + length + i] =
				440	static_cast<char>(uniform_byte(rng));
				441	}
				442	Verify(input);
				443	}
				444	}
				445	}
				446	}
				447
				448	// Verify max blowup (lots of four-byte copies)
				449	TEST(Snappy, MaxBlowup) {
				450	std::mt19937 rng;
				451	std::uniform_int_distribution<int> uniform_byte(0, 255);
				452	std::string input;
				453	for (int i = 0; i < 80000; ++i)
				454	input.push_back(static_cast<char>(uniform_byte(rng)));
				455
				456	for (int i = 0; i < 80000; i += 4) {
				457	std::string four_bytes(input.end() - i - 4, input.end() - i);
				458	input.append(four_bytes);
				459	}
				460	Verify(input);
				461	}
				462
				463	TEST(Snappy, RandomData) {
				464	std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed));
				465	std::uniform_int_distribution<int> uniform_0_to_3(0, 3);
				466	std::uniform_int_distribution<int> uniform_0_to_8(0, 8);
				467	std::uniform_int_distribution<int> uniform_byte(0, 255);
				468	std::uniform_int_distribution<size_t> uniform_4k(0, 4095);
				469	std::uniform_int_distribution<size_t> uniform_64k(0, 65535);
				470	std::bernoulli_distribution one_in_ten(1.0 / 10);
				471
				472	constexpr int num_ops = 20000;
				473	for (int i = 0; i < num_ops; ++i) {
				474	if ((i % 1000) == 0) {
				475	VLOG(0) << "Random op " << i << " of " << num_ops;
				476	}
				477
				478	std::string x;
				479	size_t len = uniform_4k(rng);
				480	if (i < 100) {
				481	len = 65536 + uniform_64k(rng);
				482	}
				483	while (x.size() < len) {
				484	int run_len = 1;
				485	if (one_in_ten(rng)) {
				486	int skewed_bits = uniform_0_to_8(rng);
				487	// int is guaranteed to hold at least 16 bits, this uses at most 8 bits.
				488	std::uniform_int_distribution<int> skewed_low(0,
				489	(1 << skewed_bits) - 1);
				490	run_len = skewed_low(rng);
				491	}
				492	char c = static_cast<char>(uniform_byte(rng));
				493	if (i >= 100) {
				494	int skewed_bits = uniform_0_to_3(rng);
				495	// int is guaranteed to hold at least 16 bits, this uses at most 3 bits.
				496	std::uniform_int_distribution<int> skewed_low(0,
				497	(1 << skewed_bits) - 1);
				498	c = static_cast<char>(skewed_low(rng));
				499	}
				500	while (run_len-- > 0 && x.size() < len) {
				501	x.push_back(c);
				502	}
				503	}
				504
				505	Verify(x);
				506	}
				507	}
				508
				509	TEST(Snappy, FourByteOffset) {
				510	// The new compressor cannot generate four-byte offsets since
				511	// it chops up the input into 32KB pieces. So we hand-emit the
				512	// copy manually.
				513
				514	// The two fragments that make up the input string.
				515	std::string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
				516	std::string fragment2 = "some other string";
				517
				518	// How many times each fragment is emitted.
				519	const int n1 = 2;
				520	const int n2 = 100000 / fragment2.size();
				521	const size_t length = n1 * fragment1.size() + n2 * fragment2.size();
				522
				523	std::string compressed;
				524	Varint::Append32(&compressed, length);
				525
				526	AppendLiteral(&compressed, fragment1);
				527	std::string src = fragment1;
				528	for (int i = 0; i < n2; ++i) {
				529	AppendLiteral(&compressed, fragment2);
				530	src += fragment2;
				531	}
				532	AppendCopy(&compressed, src.size(), fragment1.size());
				533	src += fragment1;
				534	CHECK_EQ(length, src.size());
				535
				536	std::string uncompressed;
				537	CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
				538	CHECK(snappy::Uncompress(compressed.data(), compressed.size(),
				539	&uncompressed));
				540	CHECK_EQ(uncompressed, src);
				541	}
				542
				543	TEST(Snappy, IOVecEdgeCases) {
				544	// Test some tricky edge cases in the iovec output that are not necessarily
				545	// exercised by random tests.
				546
				547	// Our output blocks look like this initially (the last iovec is bigger
				548	// than depicted):
				549	// [ ] [ ] [ ] [ ] [ ]
				550	static const int kLengths[] = { 2, 1, 4, 8, 128 };
				551
				552	struct iovec iov[ARRAYSIZE(kLengths)];
				553	for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
				554	iov[i].iov_base = new char[kLengths[i]];
				555	iov[i].iov_len = kLengths[i];
				556	}
				557
				558	std::string compressed;
				559	Varint::Append32(&compressed, 22);
				560
				561	// A literal whose output crosses three blocks.
				562	// [ab] [c] [123 ] [ ] [ ]
				563	AppendLiteral(&compressed, "abc123");
				564
				565	// A copy whose output crosses two blocks (source and destination
				566	// segments marked).
				567	// [ab] [c] [1231] [23 ] [ ]
				568	// ^--^ --
				569	AppendCopy(&compressed, 3, 3);
				570
				571	// A copy where the input is, at first, in the block before the output:
				572	//
				573	// [ab] [c] [1231] [231231 ] [ ]
				574	// ^--- ^---
				575	// Then during the copy, the pointers move such that the input and
				576	// output pointers are in the same block:
				577	//
				578	// [ab] [c] [1231] [23123123] [ ]
				579	// ^- ^-
				580	// And then they move again, so that the output pointer is no longer
				581	// in the same block as the input pointer:
				582	// [ab] [c] [1231] [23123123] [123 ]
				583	// ^-- ^--
				584	AppendCopy(&compressed, 6, 9);
				585
				586	// Finally, a copy where the input is from several blocks back,
				587	// and it also crosses three blocks:
				588	//
				589	// [ab] [c] [1231] [23123123] [123b ]
				590	// ^ ^
				591	// [ab] [c] [1231] [23123123] [123bc ]
				592	// ^ ^
				593	// [ab] [c] [1231] [23123123] [123bc12 ]
				594	// ^- ^-
				595	AppendCopy(&compressed, 17, 4);
				596
				597	CHECK(snappy::RawUncompressToIOVec(
				598	compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
				599	CHECK_EQ(0, memcmp(iov[0].iov_base, "ab", 2));
				600	CHECK_EQ(0, memcmp(iov[1].iov_base, "c", 1));
				601	CHECK_EQ(0, memcmp(iov[2].iov_base, "1231", 4));
				602	CHECK_EQ(0, memcmp(iov[3].iov_base, "23123123", 8));
				603	CHECK_EQ(0, memcmp(iov[4].iov_base, "123bc12", 7));
				604
				605	for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
				606	delete[] reinterpret_cast<char *>(iov[i].iov_base);
				607	}
				608	}
				609
				610	TEST(Snappy, IOVecLiteralOverflow) {
				611	static const int kLengths[] = { 3, 4 };
				612
				613	struct iovec iov[ARRAYSIZE(kLengths)];
				614	for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
				615	iov[i].iov_base = new char[kLengths[i]];
				616	iov[i].iov_len = kLengths[i];
				617	}
				618
				619	std::string compressed;
				620	Varint::Append32(&compressed, 8);
				621
				622	AppendLiteral(&compressed, "12345678");
				623
				624	CHECK(!snappy::RawUncompressToIOVec(
				625	compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
				626
				627	for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
				628	delete[] reinterpret_cast<char *>(iov[i].iov_base);
				629	}
				630	}
				631
				632	TEST(Snappy, IOVecCopyOverflow) {
				633	static const int kLengths[] = { 3, 4 };
				634
				635	struct iovec iov[ARRAYSIZE(kLengths)];
				636	for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
				637	iov[i].iov_base = new char[kLengths[i]];
				638	iov[i].iov_len = kLengths[i];
				639	}
				640
				641	std::string compressed;
				642	Varint::Append32(&compressed, 8);
				643
				644	AppendLiteral(&compressed, "123");
				645	AppendCopy(&compressed, 3, 5);
				646
				647	CHECK(!snappy::RawUncompressToIOVec(
				648	compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
				649
				650	for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
				651	delete[] reinterpret_cast<char *>(iov[i].iov_base);
				652	}
				653	}
				654
				655	bool CheckUncompressedLength(const std::string& compressed, size_t* ulength) {
				656	const bool result1 = snappy::GetUncompressedLength(compressed.data(),
				657	compressed.size(),
				658	ulength);
				659
				660	snappy::ByteArraySource source(compressed.data(), compressed.size());
				661	uint32_t length;
				662	const bool result2 = snappy::GetUncompressedLength(&source, &length);
				663	CHECK_EQ(result1, result2);
				664	return result1;
				665	}
				666
				667	TEST(SnappyCorruption, TruncatedVarint) {
				668	std::string compressed, uncompressed;
				669	size_t ulength;
				670	compressed.push_back('\xf0');
				671	CHECK(!CheckUncompressedLength(compressed, &ulength));
				672	CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
				673	CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
				674	&uncompressed));
				675	}
				676
				677	TEST(SnappyCorruption, UnterminatedVarint) {
				678	std::string compressed, uncompressed;
				679	size_t ulength;
				680	compressed.push_back('\x80');
				681	compressed.push_back('\x80');
				682	compressed.push_back('\x80');
				683	compressed.push_back('\x80');
				684	compressed.push_back('\x80');
				685	compressed.push_back(10);
				686	CHECK(!CheckUncompressedLength(compressed, &ulength));
				687	CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
				688	CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
				689	&uncompressed));
				690	}
				691
				692	TEST(SnappyCorruption, OverflowingVarint) {
				693	std::string compressed, uncompressed;
				694	size_t ulength;
				695	compressed.push_back('\xfb');
				696	compressed.push_back('\xff');
				697	compressed.push_back('\xff');
				698	compressed.push_back('\xff');
				699	compressed.push_back('\x7f');
				700	CHECK(!CheckUncompressedLength(compressed, &ulength));
				701	CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
				702	CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
				703	&uncompressed));
				704	}
				705
				706	TEST(Snappy, ReadPastEndOfBuffer) {
				707	// Check that we do not read past end of input
				708
				709	// Make a compressed string that ends with a single-byte literal
				710	std::string compressed;
				711	Varint::Append32(&compressed, 1);
				712	AppendLiteral(&compressed, "x");
				713
				714	std::string uncompressed;
				715	DataEndingAtUnreadablePage c(compressed);
				716	CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
				717	CHECK_EQ(uncompressed, std::string("x"));
				718	}
				719
				720	// Check for an infinite loop caused by a copy with offset==0
				721	TEST(Snappy, ZeroOffsetCopy) {
				722	const char* compressed = "\x40\x12\x00\x00";
				723	// \x40 Length (must be > kMaxIncrementCopyOverflow)
				724	// \x12\x00\x00 Copy with offset==0, length==5
				725	char uncompressed[100];
				726	EXPECT_FALSE(snappy::RawUncompress(compressed, 4, uncompressed));
				727	}
				728
				729	TEST(Snappy, ZeroOffsetCopyValidation) {
				730	const char* compressed = "\x05\x12\x00\x00";
				731	// \x05 Length
				732	// \x12\x00\x00 Copy with offset==0, length==5
				733	EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4));
				734	}
				735
				736	int TestFindMatchLength(const char* s1, const char *s2, unsigned length) {
				737	uint64_t data;
				738	std::pair<size_t, bool> p =
				739	snappy::internal::FindMatchLength(s1, s2, s2 + length, &data);
				740	CHECK_EQ(p.first < 8, p.second);
				741	return p.first;
				742	}
				743
				744	TEST(Snappy, FindMatchLength) {
				745	// Exercise all different code paths through the function.
				746	// 64-bit version:
				747
				748	// Hit s1_limit in 64-bit loop, hit s1_limit in single-character loop.
				749	EXPECT_EQ(6, TestFindMatchLength("012345", "012345", 6));
				750	EXPECT_EQ(11, TestFindMatchLength("01234567abc", "01234567abc", 11));
				751
				752	// Hit s1_limit in 64-bit loop, find a non-match in single-character loop.
				753	EXPECT_EQ(9, TestFindMatchLength("01234567abc", "01234567axc", 9));
				754
				755	// Same, but edge cases.
				756	EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc!", 11));
				757	EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc?", 11));
				758
				759	// Find non-match at once in first loop.
				760	EXPECT_EQ(0, TestFindMatchLength("01234567xxxxxxxx", "?1234567xxxxxxxx", 16));
				761	EXPECT_EQ(1, TestFindMatchLength("01234567xxxxxxxx", "0?234567xxxxxxxx", 16));
				762	EXPECT_EQ(4, TestFindMatchLength("01234567xxxxxxxx", "01237654xxxxxxxx", 16));
				763	EXPECT_EQ(7, TestFindMatchLength("01234567xxxxxxxx", "0123456?xxxxxxxx", 16));
				764
				765	// Find non-match in first loop after one block.
				766	EXPECT_EQ(8, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
				767	"abcdefgh?1234567xxxxxxxx", 24));
				768	EXPECT_EQ(9, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
				769	"abcdefgh0?234567xxxxxxxx", 24));
				770	EXPECT_EQ(12, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
				771	"abcdefgh01237654xxxxxxxx", 24));
				772	EXPECT_EQ(15, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
				773	"abcdefgh0123456?xxxxxxxx", 24));
				774
				775	// 32-bit version:
				776
				777	// Short matches.
				778	EXPECT_EQ(0, TestFindMatchLength("01234567", "?1234567", 8));
				779	EXPECT_EQ(1, TestFindMatchLength("01234567", "0?234567", 8));
				780	EXPECT_EQ(2, TestFindMatchLength("01234567", "01?34567", 8));
				781	EXPECT_EQ(3, TestFindMatchLength("01234567", "012?4567", 8));
				782	EXPECT_EQ(4, TestFindMatchLength("01234567", "0123?567", 8));
				783	EXPECT_EQ(5, TestFindMatchLength("01234567", "01234?67", 8));
				784	EXPECT_EQ(6, TestFindMatchLength("01234567", "012345?7", 8));
				785	EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 8));
				786	EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 7));
				787	EXPECT_EQ(7, TestFindMatchLength("01234567!", "0123456??", 7));
				788
				789	// Hit s1_limit in 32-bit loop, hit s1_limit in single-character loop.
				790	EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd", "xxxxxxabcd", 10));
				791	EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd?", "xxxxxxabcd?", 10));
				792	EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcdef", "xxxxxxabcdef", 13));
				793
				794	// Same, but edge cases.
				795	EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc!", 12));
				796	EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc?", 12));
				797
				798	// Hit s1_limit in 32-bit loop, find a non-match in single-character loop.
				799	EXPECT_EQ(11, TestFindMatchLength("xxxxxx0123abc", "xxxxxx0123axc", 13));
				800
				801	// Find non-match at once in first loop.
				802	EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123xxxxxxxx",
				803	"xxxxxx?123xxxxxxxx", 18));
				804	EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123xxxxxxxx",
				805	"xxxxxx0?23xxxxxxxx", 18));
				806	EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123xxxxxxxx",
				807	"xxxxxx0132xxxxxxxx", 18));
				808	EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123xxxxxxxx",
				809	"xxxxxx012?xxxxxxxx", 18));
				810
				811	// Same, but edge cases.
				812	EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123", "xxxxxx?123", 10));
				813	EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123", "xxxxxx0?23", 10));
				814	EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123", "xxxxxx0132", 10));
				815	EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123", "xxxxxx012?", 10));
				816
				817	// Find non-match in first loop after one block.
				818	EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123xx",
				819	"xxxxxxabcd?123xx", 16));
				820	EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123xx",
				821	"xxxxxxabcd0?23xx", 16));
				822	EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123xx",
				823	"xxxxxxabcd0132xx", 16));
				824	EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123xx",
				825	"xxxxxxabcd012?xx", 16));
				826
				827	// Same, but edge cases.
				828	EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd?123", 14));
				829	EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0?23", 14));
				830	EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0132", 14));
				831	EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd012?", 14));
				832	}
				833
				834	TEST(Snappy, FindMatchLengthRandom) {
				835	constexpr int kNumTrials = 10000;
				836	constexpr int kTypicalLength = 10;
				837	std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed));
				838	std::uniform_int_distribution<int> uniform_byte(0, 255);
				839	std::bernoulli_distribution one_in_two(1.0 / 2);
				840	std::bernoulli_distribution one_in_typical_length(1.0 / kTypicalLength);
				841
				842	for (int i = 0; i < kNumTrials; ++i) {
				843	std::string s, t;
				844	char a = static_cast<char>(uniform_byte(rng));
				845	char b = static_cast<char>(uniform_byte(rng));
				846	while (!one_in_typical_length(rng)) {
				847	s.push_back(one_in_two(rng) ? a : b);
				848	t.push_back(one_in_two(rng) ? a : b);
				849	}
				850	DataEndingAtUnreadablePage u(s);
				851	DataEndingAtUnreadablePage v(t);
				852	size_t matched = TestFindMatchLength(u.data(), v.data(), t.size());
				853	if (matched == t.size()) {
				854	EXPECT_EQ(s, t);
				855	} else {
				856	EXPECT_NE(s[matched], t[matched]);
				857	for (size_t j = 0; j < matched; ++j) {
				858	EXPECT_EQ(s[j], t[j]);
				859	}
				860	}
				861	}
				862	}
				863
				864	uint16_t MakeEntry(unsigned int extra, unsigned int len,
				865	unsigned int copy_offset) {
				866	// Check that all of the fields fit within the allocated space
				867	assert(extra == (extra & 0x7)); // At most 3 bits
				868	assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
				869	assert(len == (len & 0x7f)); // At most 7 bits
				870	return len \| (copy_offset << 8) \| (extra << 11);
				871	}
				872
				873	// Check that the decompression table is correct, and optionally print out
				874	// the computed one.
				875	TEST(Snappy, VerifyCharTable) {
				876	using snappy::internal::LITERAL;
				877	using snappy::internal::COPY_1_BYTE_OFFSET;
				878	using snappy::internal::COPY_2_BYTE_OFFSET;
				879	using snappy::internal::COPY_4_BYTE_OFFSET;
				880	using snappy::internal::char_table;
				881
				882	uint16_t dst[256];
				883
				884	// Place invalid entries in all places to detect missing initialization
				885	int assigned = 0;
				886	for (int i = 0; i < 256; ++i) {
				887	dst[i] = 0xffff;
				888	}
				889
				890	// Small LITERAL entries. We store (len-1) in the top 6 bits.
				891	for (uint8_t len = 1; len <= 60; ++len) {
				892	dst[LITERAL \| ((len - 1) << 2)] = MakeEntry(0, len, 0);
				893	assigned++;
				894	}
				895
				896	// Large LITERAL entries. We use 60..63 in the high 6 bits to
				897	// encode the number of bytes of length info that follow the opcode.
				898	for (uint8_t extra_bytes = 1; extra_bytes <= 4; ++extra_bytes) {
				899	// We set the length field in the lookup table to 1 because extra
				900	// bytes encode len-1.
				901	dst[LITERAL \| ((extra_bytes + 59) << 2)] = MakeEntry(extra_bytes, 1, 0);
				902	assigned++;
				903	}
				904
				905	// COPY_1_BYTE_OFFSET.
				906	//
				907	// The tag byte in the compressed data stores len-4 in 3 bits, and
				908	// offset/256 in 3 bits. offset%256 is stored in the next byte.
				909	//
				910	// This format is used for length in range [4..11] and offset in
				911	// range [0..2047]
				912	for (uint8_t len = 4; len < 12; ++len) {
				913	for (uint16_t offset = 0; offset < 2048; offset += 256) {
				914	uint8_t offset_high = static_cast<uint8_t>(offset >> 8);
				915	dst[COPY_1_BYTE_OFFSET \| ((len - 4) << 2) \| (offset_high << 5)] =
				916	MakeEntry(1, len, offset_high);
				917	assigned++;
				918	}
				919	}
				920
				921	// COPY_2_BYTE_OFFSET.
				922	// Tag contains len-1 in top 6 bits, and offset in next two bytes.
				923	for (uint8_t len = 1; len <= 64; ++len) {
				924	dst[COPY_2_BYTE_OFFSET \| ((len - 1) << 2)] = MakeEntry(2, len, 0);
				925	assigned++;
				926	}
				927
				928	// COPY_4_BYTE_OFFSET.
				929	// Tag contents len-1 in top 6 bits, and offset in next four bytes.
				930	for (uint8_t len = 1; len <= 64; ++len) {
				931	dst[COPY_4_BYTE_OFFSET \| ((len - 1) << 2)] = MakeEntry(4, len, 0);
				932	assigned++;
				933	}
				934
				935	// Check that each entry was initialized exactly once.
				936	EXPECT_EQ(256, assigned) << "Assigned only " << assigned << " of 256";
				937	for (int i = 0; i < 256; ++i) {
				938	EXPECT_NE(0xffff, dst[i]) << "Did not assign byte " << i;
				939	}
				940
				941	if (snappy::GetFlag(FLAGS_snappy_dump_decompression_table)) {
				942	std::printf("static const uint16_t char_table[256] = {\n ");
				943	for (int i = 0; i < 256; ++i) {
				944	std::printf("0x%04x%s",
				945	dst[i],
				946	((i == 255) ? "\n" : (((i % 8) == 7) ? ",\n " : ", ")));
				947	}
				948	std::printf("};\n");
				949	}
				950
				951	// Check that computed table matched recorded table.
				952	for (int i = 0; i < 256; ++i) {
				953	EXPECT_EQ(dst[i], char_table[i]) << "Mismatch in byte " << i;
				954	}
				955	}
				956
				957	TEST(Snappy, TestBenchmarkFiles) {
				958	for (int i = 0; i < ARRAYSIZE(kTestDataFiles); ++i) {
				959	Verify(ReadTestDataFile(kTestDataFiles[i].filename,
				960	kTestDataFiles[i].size_limit));
				961	}
				962	}
				963
				964	} // namespace
				965
				966	} // namespace snappy