blob: 292004ca84443d4117e9e690da352e80dfd7499f [file] [log] [blame]
James Kuszmaul48dd4c82021-10-27 20:04:08 -07001// Copyright 2005 and onwards Google Inc.
2//
3// Redistribution and use in source and binary forms, with or without
4// modification, are permitted provided that the following conditions are
5// met:
6//
7// * Redistributions of source code must retain the above copyright
8// notice, this list of conditions and the following disclaimer.
9// * Redistributions in binary form must reproduce the above
10// copyright notice, this list of conditions and the following disclaimer
11// in the documentation and/or other materials provided with the
12// distribution.
13// * Neither the name of Google Inc. nor the names of its
14// contributors may be used to endorse or promote products derived from
15// this software without specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29#include <algorithm>
30#include <cmath>
31#include <cstdlib>
32#include <random>
33#include <string>
34#include <utility>
35#include <vector>
36
37#include "snappy-test.h"
38
39#include "gtest/gtest.h"
40
41#include "snappy-internal.h"
42#include "snappy-sinksource.h"
43#include "snappy.h"
44#include "snappy_test_data.h"
45
46SNAPPY_FLAG(bool, snappy_dump_decompression_table, false,
47 "If true, we print the decompression table during tests.");
48
49namespace snappy {
50
51namespace {
52
53#if HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF
54
55// To test against code that reads beyond its input, this class copies a
56// string to a newly allocated group of pages, the last of which
57// is made unreadable via mprotect. Note that we need to allocate the
58// memory with mmap(), as POSIX allows mprotect() only on memory allocated
59// with mmap(), and some malloc/posix_memalign implementations expect to
60// be able to read previously allocated memory while doing heap allocations.
61class DataEndingAtUnreadablePage {
62 public:
63 explicit DataEndingAtUnreadablePage(const std::string& s) {
64 const size_t page_size = sysconf(_SC_PAGESIZE);
65 const size_t size = s.size();
66 // Round up space for string to a multiple of page_size.
67 size_t space_for_string = (size + page_size - 1) & ~(page_size - 1);
68 alloc_size_ = space_for_string + page_size;
69 mem_ = mmap(NULL, alloc_size_,
70 PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
71 CHECK_NE(MAP_FAILED, mem_);
72 protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
73 char* dst = protected_page_ - size;
74 std::memcpy(dst, s.data(), size);
75 data_ = dst;
76 size_ = size;
77 // Make guard page unreadable.
78 CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_NONE));
79 }
80
81 ~DataEndingAtUnreadablePage() {
82 const size_t page_size = sysconf(_SC_PAGESIZE);
83 // Undo the mprotect.
84 CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ|PROT_WRITE));
85 CHECK_EQ(0, munmap(mem_, alloc_size_));
86 }
87
88 const char* data() const { return data_; }
89 size_t size() const { return size_; }
90
91 private:
92 size_t alloc_size_;
93 void* mem_;
94 char* protected_page_;
95 const char* data_;
96 size_t size_;
97};
98
99#else // HAVE_FUNC_MMAP) && HAVE_FUNC_SYSCONF
100
101// Fallback for systems without mmap.
102using DataEndingAtUnreadablePage = std::string;
103
104#endif
105
106int VerifyString(const std::string& input) {
107 std::string compressed;
108 DataEndingAtUnreadablePage i(input);
109 const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
110 CHECK_EQ(written, compressed.size());
111 CHECK_LE(compressed.size(),
112 snappy::MaxCompressedLength(input.size()));
113 CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
114
115 std::string uncompressed;
116 DataEndingAtUnreadablePage c(compressed);
117 CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
118 CHECK_EQ(uncompressed, input);
119 return uncompressed.size();
120}
121
122void VerifyStringSink(const std::string& input) {
123 std::string compressed;
124 DataEndingAtUnreadablePage i(input);
125 const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
126 CHECK_EQ(written, compressed.size());
127 CHECK_LE(compressed.size(),
128 snappy::MaxCompressedLength(input.size()));
129 CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
130
131 std::string uncompressed;
132 uncompressed.resize(input.size());
133 snappy::UncheckedByteArraySink sink(string_as_array(&uncompressed));
134 DataEndingAtUnreadablePage c(compressed);
135 snappy::ByteArraySource source(c.data(), c.size());
136 CHECK(snappy::Uncompress(&source, &sink));
137 CHECK_EQ(uncompressed, input);
138}
139
140void VerifyIOVec(const std::string& input) {
141 std::string compressed;
142 DataEndingAtUnreadablePage i(input);
143 const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
144 CHECK_EQ(written, compressed.size());
145 CHECK_LE(compressed.size(),
146 snappy::MaxCompressedLength(input.size()));
147 CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
148
149 // Try uncompressing into an iovec containing a random number of entries
150 // ranging from 1 to 10.
151 char* buf = new char[input.size()];
152 std::minstd_rand0 rng(input.size());
153 std::uniform_int_distribution<size_t> uniform_1_to_10(1, 10);
154 size_t num = uniform_1_to_10(rng);
155 if (input.size() < num) {
156 num = input.size();
157 }
158 struct iovec* iov = new iovec[num];
159 size_t used_so_far = 0;
160 std::bernoulli_distribution one_in_five(1.0 / 5);
161 for (size_t i = 0; i < num; ++i) {
162 assert(used_so_far < input.size());
163 iov[i].iov_base = buf + used_so_far;
164 if (i == num - 1) {
165 iov[i].iov_len = input.size() - used_so_far;
166 } else {
167 // Randomly choose to insert a 0 byte entry.
168 if (one_in_five(rng)) {
169 iov[i].iov_len = 0;
170 } else {
171 std::uniform_int_distribution<size_t> uniform_not_used_so_far(
172 0, input.size() - used_so_far - 1);
173 iov[i].iov_len = uniform_not_used_so_far(rng);
174 }
175 }
176 used_so_far += iov[i].iov_len;
177 }
178 CHECK(snappy::RawUncompressToIOVec(
179 compressed.data(), compressed.size(), iov, num));
180 CHECK(!memcmp(buf, input.data(), input.size()));
181 delete[] iov;
182 delete[] buf;
183}
184
185// Test that data compressed by a compressor that does not
186// obey block sizes is uncompressed properly.
187void VerifyNonBlockedCompression(const std::string& input) {
188 if (input.length() > snappy::kBlockSize) {
189 // We cannot test larger blocks than the maximum block size, obviously.
190 return;
191 }
192
193 std::string prefix;
194 Varint::Append32(&prefix, input.size());
195
196 // Setup compression table
197 snappy::internal::WorkingMemory wmem(input.size());
198 int table_size;
199 uint16_t* table = wmem.GetHashTable(input.size(), &table_size);
200
201 // Compress entire input in one shot
202 std::string compressed;
203 compressed += prefix;
204 compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size()));
205 char* dest = string_as_array(&compressed) + prefix.size();
206 char* end = snappy::internal::CompressFragment(input.data(), input.size(),
207 dest, table, table_size);
208 compressed.resize(end - compressed.data());
209
210 // Uncompress into std::string
211 std::string uncomp_str;
212 CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str));
213 CHECK_EQ(uncomp_str, input);
214
215 // Uncompress using source/sink
216 std::string uncomp_str2;
217 uncomp_str2.resize(input.size());
218 snappy::UncheckedByteArraySink sink(string_as_array(&uncomp_str2));
219 snappy::ByteArraySource source(compressed.data(), compressed.size());
220 CHECK(snappy::Uncompress(&source, &sink));
221 CHECK_EQ(uncomp_str2, input);
222
223 // Uncompress into iovec
224 {
225 static const int kNumBlocks = 10;
226 struct iovec vec[kNumBlocks];
227 const int block_size = 1 + input.size() / kNumBlocks;
228 std::string iovec_data(block_size * kNumBlocks, 'x');
229 for (int i = 0; i < kNumBlocks; ++i) {
230 vec[i].iov_base = string_as_array(&iovec_data) + i * block_size;
231 vec[i].iov_len = block_size;
232 }
233 CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(),
234 vec, kNumBlocks));
235 CHECK_EQ(std::string(iovec_data.data(), input.size()), input);
236 }
237}
238
239// Expand the input so that it is at least K times as big as block size
240std::string Expand(const std::string& input) {
241 static const int K = 3;
242 std::string data = input;
243 while (data.size() < K * snappy::kBlockSize) {
244 data += input;
245 }
246 return data;
247}
248
249int Verify(const std::string& input) {
250 VLOG(1) << "Verifying input of size " << input.size();
251
252 // Compress using string based routines
253 const int result = VerifyString(input);
254
255 // Verify using sink based routines
256 VerifyStringSink(input);
257
258 VerifyNonBlockedCompression(input);
259 VerifyIOVec(input);
260 if (!input.empty()) {
261 const std::string expanded = Expand(input);
262 VerifyNonBlockedCompression(expanded);
263 VerifyIOVec(input);
264 }
265
266 return result;
267}
268
269bool IsValidCompressedBuffer(const std::string& c) {
270 return snappy::IsValidCompressedBuffer(c.data(), c.size());
271}
272bool Uncompress(const std::string& c, std::string* u) {
273 return snappy::Uncompress(c.data(), c.size(), u);
274}
275
276// This test checks to ensure that snappy doesn't coredump if it gets
277// corrupted data.
278TEST(CorruptedTest, VerifyCorrupted) {
279 std::string source = "making sure we don't crash with corrupted input";
280 VLOG(1) << source;
281 std::string dest;
282 std::string uncmp;
283 snappy::Compress(source.data(), source.size(), &dest);
284
285 // Mess around with the data. It's hard to simulate all possible
286 // corruptions; this is just one example ...
287 CHECK_GT(dest.size(), 3);
288 dest[1]--;
289 dest[3]++;
290 // this really ought to fail.
291 CHECK(!IsValidCompressedBuffer(dest));
292 CHECK(!Uncompress(dest, &uncmp));
293
294 // This is testing for a security bug - a buffer that decompresses to 100k
295 // but we lie in the snappy header and only reserve 0 bytes of memory :)
296 source.resize(100000);
297 for (char& source_char : source) {
298 source_char = 'A';
299 }
300 snappy::Compress(source.data(), source.size(), &dest);
301 dest[0] = dest[1] = dest[2] = dest[3] = 0;
302 CHECK(!IsValidCompressedBuffer(dest));
303 CHECK(!Uncompress(dest, &uncmp));
304
305 if (sizeof(void *) == 4) {
306 // Another security check; check a crazy big length can't DoS us with an
307 // over-allocation.
308 // Currently this is done only for 32-bit builds. On 64-bit builds,
309 // where 3 GB might be an acceptable allocation size, Uncompress()
310 // attempts to decompress, and sometimes causes the test to run out of
311 // memory.
312 dest[0] = dest[1] = dest[2] = dest[3] = '\xff';
313 // This decodes to a really large size, i.e., about 3 GB.
314 dest[4] = 'k';
315 CHECK(!IsValidCompressedBuffer(dest));
316 CHECK(!Uncompress(dest, &uncmp));
317 } else {
318 LOG(WARNING) << "Crazy decompression lengths not checked on 64-bit build";
319 }
320
321 // This decodes to about 2 MB; much smaller, but should still fail.
322 dest[0] = dest[1] = dest[2] = '\xff';
323 dest[3] = 0x00;
324 CHECK(!IsValidCompressedBuffer(dest));
325 CHECK(!Uncompress(dest, &uncmp));
326
327 // try reading stuff in from a bad file.
328 for (int i = 1; i <= 3; ++i) {
329 std::string data =
330 ReadTestDataFile(StrFormat("baddata%d.snappy", i).c_str(), 0);
331 std::string uncmp;
332 // check that we don't return a crazy length
333 size_t ulen;
334 CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen)
335 || (ulen < (1<<20)));
336 uint32_t ulen2;
337 snappy::ByteArraySource source(data.data(), data.size());
338 CHECK(!snappy::GetUncompressedLength(&source, &ulen2) ||
339 (ulen2 < (1<<20)));
340 CHECK(!IsValidCompressedBuffer(data));
341 CHECK(!Uncompress(data, &uncmp));
342 }
343}
344
345// Helper routines to construct arbitrary compressed strings.
346// These mirror the compression code in snappy.cc, but are copied
347// here so that we can bypass some limitations in the how snappy.cc
348// invokes these routines.
349void AppendLiteral(std::string* dst, const std::string& literal) {
350 if (literal.empty()) return;
351 int n = literal.size() - 1;
352 if (n < 60) {
353 // Fit length in tag byte
354 dst->push_back(0 | (n << 2));
355 } else {
356 // Encode in upcoming bytes
357 char number[4];
358 int count = 0;
359 while (n > 0) {
360 number[count++] = n & 0xff;
361 n >>= 8;
362 }
363 dst->push_back(0 | ((59+count) << 2));
364 *dst += std::string(number, count);
365 }
366 *dst += literal;
367}
368
369void AppendCopy(std::string* dst, int offset, int length) {
370 while (length > 0) {
371 // Figure out how much to copy in one shot
372 int to_copy;
373 if (length >= 68) {
374 to_copy = 64;
375 } else if (length > 64) {
376 to_copy = 60;
377 } else {
378 to_copy = length;
379 }
380 length -= to_copy;
381
382 if ((to_copy >= 4) && (to_copy < 12) && (offset < 2048)) {
383 assert(to_copy-4 < 8); // Must fit in 3 bits
384 dst->push_back(1 | ((to_copy-4) << 2) | ((offset >> 8) << 5));
385 dst->push_back(offset & 0xff);
386 } else if (offset < 65536) {
387 dst->push_back(2 | ((to_copy-1) << 2));
388 dst->push_back(offset & 0xff);
389 dst->push_back(offset >> 8);
390 } else {
391 dst->push_back(3 | ((to_copy-1) << 2));
392 dst->push_back(offset & 0xff);
393 dst->push_back((offset >> 8) & 0xff);
394 dst->push_back((offset >> 16) & 0xff);
395 dst->push_back((offset >> 24) & 0xff);
396 }
397 }
398}
399
400TEST(Snappy, SimpleTests) {
401 Verify("");
402 Verify("a");
403 Verify("ab");
404 Verify("abc");
405
406 Verify("aaaaaaa" + std::string(16, 'b') + std::string("aaaaa") + "abc");
407 Verify("aaaaaaa" + std::string(256, 'b') + std::string("aaaaa") + "abc");
408 Verify("aaaaaaa" + std::string(2047, 'b') + std::string("aaaaa") + "abc");
409 Verify("aaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
410 Verify("abcaaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc");
411}
412
413// Regression test for cr/345340892.
414TEST(Snappy, AppendSelfPatternExtensionEdgeCases) {
415 Verify("abcabcabcabcabcabcab");
416 Verify("abcabcabcabcabcabcab0123456789ABCDEF");
417
418 Verify("abcabcabcabcabcabcabcabcabcabcabcabc");
419 Verify("abcabcabcabcabcabcabcabcabcabcabcabc0123456789ABCDEF");
420}
421
422// Regression test for cr/345340892.
423TEST(Snappy, AppendSelfPatternExtensionEdgeCasesExhaustive) {
424 std::mt19937 rng;
425 std::uniform_int_distribution<int> uniform_byte(0, 255);
426 for (int pattern_size = 1; pattern_size <= 18; ++pattern_size) {
427 for (int length = 1; length <= 64; ++length) {
428 for (int extra_bytes_after_pattern : {0, 1, 15, 16, 128}) {
429 const int size = pattern_size + length + extra_bytes_after_pattern;
430 std::string input;
431 input.resize(size);
432 for (int i = 0; i < pattern_size; ++i) {
433 input[i] = 'a' + i;
434 }
435 for (int i = 0; i < length; ++i) {
436 input[pattern_size + i] = input[i];
437 }
438 for (int i = 0; i < extra_bytes_after_pattern; ++i) {
439 input[pattern_size + length + i] =
440 static_cast<char>(uniform_byte(rng));
441 }
442 Verify(input);
443 }
444 }
445 }
446}
447
448// Verify max blowup (lots of four-byte copies)
449TEST(Snappy, MaxBlowup) {
450 std::mt19937 rng;
451 std::uniform_int_distribution<int> uniform_byte(0, 255);
452 std::string input;
453 for (int i = 0; i < 80000; ++i)
454 input.push_back(static_cast<char>(uniform_byte(rng)));
455
456 for (int i = 0; i < 80000; i += 4) {
457 std::string four_bytes(input.end() - i - 4, input.end() - i);
458 input.append(four_bytes);
459 }
460 Verify(input);
461}
462
463TEST(Snappy, RandomData) {
464 std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed));
465 std::uniform_int_distribution<int> uniform_0_to_3(0, 3);
466 std::uniform_int_distribution<int> uniform_0_to_8(0, 8);
467 std::uniform_int_distribution<int> uniform_byte(0, 255);
468 std::uniform_int_distribution<size_t> uniform_4k(0, 4095);
469 std::uniform_int_distribution<size_t> uniform_64k(0, 65535);
470 std::bernoulli_distribution one_in_ten(1.0 / 10);
471
472 constexpr int num_ops = 20000;
473 for (int i = 0; i < num_ops; ++i) {
474 if ((i % 1000) == 0) {
475 VLOG(0) << "Random op " << i << " of " << num_ops;
476 }
477
478 std::string x;
479 size_t len = uniform_4k(rng);
480 if (i < 100) {
481 len = 65536 + uniform_64k(rng);
482 }
483 while (x.size() < len) {
484 int run_len = 1;
485 if (one_in_ten(rng)) {
486 int skewed_bits = uniform_0_to_8(rng);
487 // int is guaranteed to hold at least 16 bits, this uses at most 8 bits.
488 std::uniform_int_distribution<int> skewed_low(0,
489 (1 << skewed_bits) - 1);
490 run_len = skewed_low(rng);
491 }
492 char c = static_cast<char>(uniform_byte(rng));
493 if (i >= 100) {
494 int skewed_bits = uniform_0_to_3(rng);
495 // int is guaranteed to hold at least 16 bits, this uses at most 3 bits.
496 std::uniform_int_distribution<int> skewed_low(0,
497 (1 << skewed_bits) - 1);
498 c = static_cast<char>(skewed_low(rng));
499 }
500 while (run_len-- > 0 && x.size() < len) {
501 x.push_back(c);
502 }
503 }
504
505 Verify(x);
506 }
507}
508
509TEST(Snappy, FourByteOffset) {
510 // The new compressor cannot generate four-byte offsets since
511 // it chops up the input into 32KB pieces. So we hand-emit the
512 // copy manually.
513
514 // The two fragments that make up the input string.
515 std::string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz";
516 std::string fragment2 = "some other string";
517
518 // How many times each fragment is emitted.
519 const int n1 = 2;
520 const int n2 = 100000 / fragment2.size();
521 const size_t length = n1 * fragment1.size() + n2 * fragment2.size();
522
523 std::string compressed;
524 Varint::Append32(&compressed, length);
525
526 AppendLiteral(&compressed, fragment1);
527 std::string src = fragment1;
528 for (int i = 0; i < n2; ++i) {
529 AppendLiteral(&compressed, fragment2);
530 src += fragment2;
531 }
532 AppendCopy(&compressed, src.size(), fragment1.size());
533 src += fragment1;
534 CHECK_EQ(length, src.size());
535
536 std::string uncompressed;
537 CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
538 CHECK(snappy::Uncompress(compressed.data(), compressed.size(),
539 &uncompressed));
540 CHECK_EQ(uncompressed, src);
541}
542
543TEST(Snappy, IOVecEdgeCases) {
544 // Test some tricky edge cases in the iovec output that are not necessarily
545 // exercised by random tests.
546
547 // Our output blocks look like this initially (the last iovec is bigger
548 // than depicted):
549 // [ ] [ ] [ ] [ ] [ ]
550 static const int kLengths[] = { 2, 1, 4, 8, 128 };
551
552 struct iovec iov[ARRAYSIZE(kLengths)];
553 for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
554 iov[i].iov_base = new char[kLengths[i]];
555 iov[i].iov_len = kLengths[i];
556 }
557
558 std::string compressed;
559 Varint::Append32(&compressed, 22);
560
561 // A literal whose output crosses three blocks.
562 // [ab] [c] [123 ] [ ] [ ]
563 AppendLiteral(&compressed, "abc123");
564
565 // A copy whose output crosses two blocks (source and destination
566 // segments marked).
567 // [ab] [c] [1231] [23 ] [ ]
568 // ^--^ --
569 AppendCopy(&compressed, 3, 3);
570
571 // A copy where the input is, at first, in the block before the output:
572 //
573 // [ab] [c] [1231] [231231 ] [ ]
574 // ^--- ^---
575 // Then during the copy, the pointers move such that the input and
576 // output pointers are in the same block:
577 //
578 // [ab] [c] [1231] [23123123] [ ]
579 // ^- ^-
580 // And then they move again, so that the output pointer is no longer
581 // in the same block as the input pointer:
582 // [ab] [c] [1231] [23123123] [123 ]
583 // ^-- ^--
584 AppendCopy(&compressed, 6, 9);
585
586 // Finally, a copy where the input is from several blocks back,
587 // and it also crosses three blocks:
588 //
589 // [ab] [c] [1231] [23123123] [123b ]
590 // ^ ^
591 // [ab] [c] [1231] [23123123] [123bc ]
592 // ^ ^
593 // [ab] [c] [1231] [23123123] [123bc12 ]
594 // ^- ^-
595 AppendCopy(&compressed, 17, 4);
596
597 CHECK(snappy::RawUncompressToIOVec(
598 compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
599 CHECK_EQ(0, memcmp(iov[0].iov_base, "ab", 2));
600 CHECK_EQ(0, memcmp(iov[1].iov_base, "c", 1));
601 CHECK_EQ(0, memcmp(iov[2].iov_base, "1231", 4));
602 CHECK_EQ(0, memcmp(iov[3].iov_base, "23123123", 8));
603 CHECK_EQ(0, memcmp(iov[4].iov_base, "123bc12", 7));
604
605 for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
606 delete[] reinterpret_cast<char *>(iov[i].iov_base);
607 }
608}
609
610TEST(Snappy, IOVecLiteralOverflow) {
611 static const int kLengths[] = { 3, 4 };
612
613 struct iovec iov[ARRAYSIZE(kLengths)];
614 for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
615 iov[i].iov_base = new char[kLengths[i]];
616 iov[i].iov_len = kLengths[i];
617 }
618
619 std::string compressed;
620 Varint::Append32(&compressed, 8);
621
622 AppendLiteral(&compressed, "12345678");
623
624 CHECK(!snappy::RawUncompressToIOVec(
625 compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
626
627 for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
628 delete[] reinterpret_cast<char *>(iov[i].iov_base);
629 }
630}
631
632TEST(Snappy, IOVecCopyOverflow) {
633 static const int kLengths[] = { 3, 4 };
634
635 struct iovec iov[ARRAYSIZE(kLengths)];
636 for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
637 iov[i].iov_base = new char[kLengths[i]];
638 iov[i].iov_len = kLengths[i];
639 }
640
641 std::string compressed;
642 Varint::Append32(&compressed, 8);
643
644 AppendLiteral(&compressed, "123");
645 AppendCopy(&compressed, 3, 5);
646
647 CHECK(!snappy::RawUncompressToIOVec(
648 compressed.data(), compressed.size(), iov, ARRAYSIZE(iov)));
649
650 for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
651 delete[] reinterpret_cast<char *>(iov[i].iov_base);
652 }
653}
654
655bool CheckUncompressedLength(const std::string& compressed, size_t* ulength) {
656 const bool result1 = snappy::GetUncompressedLength(compressed.data(),
657 compressed.size(),
658 ulength);
659
660 snappy::ByteArraySource source(compressed.data(), compressed.size());
661 uint32_t length;
662 const bool result2 = snappy::GetUncompressedLength(&source, &length);
663 CHECK_EQ(result1, result2);
664 return result1;
665}
666
667TEST(SnappyCorruption, TruncatedVarint) {
668 std::string compressed, uncompressed;
669 size_t ulength;
670 compressed.push_back('\xf0');
671 CHECK(!CheckUncompressedLength(compressed, &ulength));
672 CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
673 CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
674 &uncompressed));
675}
676
677TEST(SnappyCorruption, UnterminatedVarint) {
678 std::string compressed, uncompressed;
679 size_t ulength;
680 compressed.push_back('\x80');
681 compressed.push_back('\x80');
682 compressed.push_back('\x80');
683 compressed.push_back('\x80');
684 compressed.push_back('\x80');
685 compressed.push_back(10);
686 CHECK(!CheckUncompressedLength(compressed, &ulength));
687 CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
688 CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
689 &uncompressed));
690}
691
692TEST(SnappyCorruption, OverflowingVarint) {
693 std::string compressed, uncompressed;
694 size_t ulength;
695 compressed.push_back('\xfb');
696 compressed.push_back('\xff');
697 compressed.push_back('\xff');
698 compressed.push_back('\xff');
699 compressed.push_back('\x7f');
700 CHECK(!CheckUncompressedLength(compressed, &ulength));
701 CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
702 CHECK(!snappy::Uncompress(compressed.data(), compressed.size(),
703 &uncompressed));
704}
705
706TEST(Snappy, ReadPastEndOfBuffer) {
707 // Check that we do not read past end of input
708
709 // Make a compressed string that ends with a single-byte literal
710 std::string compressed;
711 Varint::Append32(&compressed, 1);
712 AppendLiteral(&compressed, "x");
713
714 std::string uncompressed;
715 DataEndingAtUnreadablePage c(compressed);
716 CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
717 CHECK_EQ(uncompressed, std::string("x"));
718}
719
720// Check for an infinite loop caused by a copy with offset==0
721TEST(Snappy, ZeroOffsetCopy) {
722 const char* compressed = "\x40\x12\x00\x00";
723 // \x40 Length (must be > kMaxIncrementCopyOverflow)
724 // \x12\x00\x00 Copy with offset==0, length==5
725 char uncompressed[100];
726 EXPECT_FALSE(snappy::RawUncompress(compressed, 4, uncompressed));
727}
728
729TEST(Snappy, ZeroOffsetCopyValidation) {
730 const char* compressed = "\x05\x12\x00\x00";
731 // \x05 Length
732 // \x12\x00\x00 Copy with offset==0, length==5
733 EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4));
734}
735
736int TestFindMatchLength(const char* s1, const char *s2, unsigned length) {
737 uint64_t data;
738 std::pair<size_t, bool> p =
739 snappy::internal::FindMatchLength(s1, s2, s2 + length, &data);
740 CHECK_EQ(p.first < 8, p.second);
741 return p.first;
742}
743
744TEST(Snappy, FindMatchLength) {
745 // Exercise all different code paths through the function.
746 // 64-bit version:
747
748 // Hit s1_limit in 64-bit loop, hit s1_limit in single-character loop.
749 EXPECT_EQ(6, TestFindMatchLength("012345", "012345", 6));
750 EXPECT_EQ(11, TestFindMatchLength("01234567abc", "01234567abc", 11));
751
752 // Hit s1_limit in 64-bit loop, find a non-match in single-character loop.
753 EXPECT_EQ(9, TestFindMatchLength("01234567abc", "01234567axc", 9));
754
755 // Same, but edge cases.
756 EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc!", 11));
757 EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc?", 11));
758
759 // Find non-match at once in first loop.
760 EXPECT_EQ(0, TestFindMatchLength("01234567xxxxxxxx", "?1234567xxxxxxxx", 16));
761 EXPECT_EQ(1, TestFindMatchLength("01234567xxxxxxxx", "0?234567xxxxxxxx", 16));
762 EXPECT_EQ(4, TestFindMatchLength("01234567xxxxxxxx", "01237654xxxxxxxx", 16));
763 EXPECT_EQ(7, TestFindMatchLength("01234567xxxxxxxx", "0123456?xxxxxxxx", 16));
764
765 // Find non-match in first loop after one block.
766 EXPECT_EQ(8, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
767 "abcdefgh?1234567xxxxxxxx", 24));
768 EXPECT_EQ(9, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
769 "abcdefgh0?234567xxxxxxxx", 24));
770 EXPECT_EQ(12, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
771 "abcdefgh01237654xxxxxxxx", 24));
772 EXPECT_EQ(15, TestFindMatchLength("abcdefgh01234567xxxxxxxx",
773 "abcdefgh0123456?xxxxxxxx", 24));
774
775 // 32-bit version:
776
777 // Short matches.
778 EXPECT_EQ(0, TestFindMatchLength("01234567", "?1234567", 8));
779 EXPECT_EQ(1, TestFindMatchLength("01234567", "0?234567", 8));
780 EXPECT_EQ(2, TestFindMatchLength("01234567", "01?34567", 8));
781 EXPECT_EQ(3, TestFindMatchLength("01234567", "012?4567", 8));
782 EXPECT_EQ(4, TestFindMatchLength("01234567", "0123?567", 8));
783 EXPECT_EQ(5, TestFindMatchLength("01234567", "01234?67", 8));
784 EXPECT_EQ(6, TestFindMatchLength("01234567", "012345?7", 8));
785 EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 8));
786 EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 7));
787 EXPECT_EQ(7, TestFindMatchLength("01234567!", "0123456??", 7));
788
789 // Hit s1_limit in 32-bit loop, hit s1_limit in single-character loop.
790 EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd", "xxxxxxabcd", 10));
791 EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd?", "xxxxxxabcd?", 10));
792 EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcdef", "xxxxxxabcdef", 13));
793
794 // Same, but edge cases.
795 EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc!", 12));
796 EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc?", 12));
797
798 // Hit s1_limit in 32-bit loop, find a non-match in single-character loop.
799 EXPECT_EQ(11, TestFindMatchLength("xxxxxx0123abc", "xxxxxx0123axc", 13));
800
801 // Find non-match at once in first loop.
802 EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123xxxxxxxx",
803 "xxxxxx?123xxxxxxxx", 18));
804 EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123xxxxxxxx",
805 "xxxxxx0?23xxxxxxxx", 18));
806 EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123xxxxxxxx",
807 "xxxxxx0132xxxxxxxx", 18));
808 EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123xxxxxxxx",
809 "xxxxxx012?xxxxxxxx", 18));
810
811 // Same, but edge cases.
812 EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123", "xxxxxx?123", 10));
813 EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123", "xxxxxx0?23", 10));
814 EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123", "xxxxxx0132", 10));
815 EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123", "xxxxxx012?", 10));
816
817 // Find non-match in first loop after one block.
818 EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123xx",
819 "xxxxxxabcd?123xx", 16));
820 EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123xx",
821 "xxxxxxabcd0?23xx", 16));
822 EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123xx",
823 "xxxxxxabcd0132xx", 16));
824 EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123xx",
825 "xxxxxxabcd012?xx", 16));
826
827 // Same, but edge cases.
828 EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd?123", 14));
829 EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0?23", 14));
830 EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0132", 14));
831 EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd012?", 14));
832}
833
834TEST(Snappy, FindMatchLengthRandom) {
835 constexpr int kNumTrials = 10000;
836 constexpr int kTypicalLength = 10;
837 std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed));
838 std::uniform_int_distribution<int> uniform_byte(0, 255);
839 std::bernoulli_distribution one_in_two(1.0 / 2);
840 std::bernoulli_distribution one_in_typical_length(1.0 / kTypicalLength);
841
842 for (int i = 0; i < kNumTrials; ++i) {
843 std::string s, t;
844 char a = static_cast<char>(uniform_byte(rng));
845 char b = static_cast<char>(uniform_byte(rng));
846 while (!one_in_typical_length(rng)) {
847 s.push_back(one_in_two(rng) ? a : b);
848 t.push_back(one_in_two(rng) ? a : b);
849 }
850 DataEndingAtUnreadablePage u(s);
851 DataEndingAtUnreadablePage v(t);
852 size_t matched = TestFindMatchLength(u.data(), v.data(), t.size());
853 if (matched == t.size()) {
854 EXPECT_EQ(s, t);
855 } else {
856 EXPECT_NE(s[matched], t[matched]);
857 for (size_t j = 0; j < matched; ++j) {
858 EXPECT_EQ(s[j], t[j]);
859 }
860 }
861 }
862}
863
864uint16_t MakeEntry(unsigned int extra, unsigned int len,
865 unsigned int copy_offset) {
866 // Check that all of the fields fit within the allocated space
867 assert(extra == (extra & 0x7)); // At most 3 bits
868 assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits
869 assert(len == (len & 0x7f)); // At most 7 bits
870 return len | (copy_offset << 8) | (extra << 11);
871}
872
873// Check that the decompression table is correct, and optionally print out
874// the computed one.
875TEST(Snappy, VerifyCharTable) {
876 using snappy::internal::LITERAL;
877 using snappy::internal::COPY_1_BYTE_OFFSET;
878 using snappy::internal::COPY_2_BYTE_OFFSET;
879 using snappy::internal::COPY_4_BYTE_OFFSET;
880 using snappy::internal::char_table;
881
882 uint16_t dst[256];
883
884 // Place invalid entries in all places to detect missing initialization
885 int assigned = 0;
886 for (int i = 0; i < 256; ++i) {
887 dst[i] = 0xffff;
888 }
889
890 // Small LITERAL entries. We store (len-1) in the top 6 bits.
891 for (uint8_t len = 1; len <= 60; ++len) {
892 dst[LITERAL | ((len - 1) << 2)] = MakeEntry(0, len, 0);
893 assigned++;
894 }
895
896 // Large LITERAL entries. We use 60..63 in the high 6 bits to
897 // encode the number of bytes of length info that follow the opcode.
898 for (uint8_t extra_bytes = 1; extra_bytes <= 4; ++extra_bytes) {
899 // We set the length field in the lookup table to 1 because extra
900 // bytes encode len-1.
901 dst[LITERAL | ((extra_bytes + 59) << 2)] = MakeEntry(extra_bytes, 1, 0);
902 assigned++;
903 }
904
905 // COPY_1_BYTE_OFFSET.
906 //
907 // The tag byte in the compressed data stores len-4 in 3 bits, and
908 // offset/256 in 3 bits. offset%256 is stored in the next byte.
909 //
910 // This format is used for length in range [4..11] and offset in
911 // range [0..2047]
912 for (uint8_t len = 4; len < 12; ++len) {
913 for (uint16_t offset = 0; offset < 2048; offset += 256) {
914 uint8_t offset_high = static_cast<uint8_t>(offset >> 8);
915 dst[COPY_1_BYTE_OFFSET | ((len - 4) << 2) | (offset_high << 5)] =
916 MakeEntry(1, len, offset_high);
917 assigned++;
918 }
919 }
920
921 // COPY_2_BYTE_OFFSET.
922 // Tag contains len-1 in top 6 bits, and offset in next two bytes.
923 for (uint8_t len = 1; len <= 64; ++len) {
924 dst[COPY_2_BYTE_OFFSET | ((len - 1) << 2)] = MakeEntry(2, len, 0);
925 assigned++;
926 }
927
928 // COPY_4_BYTE_OFFSET.
929 // Tag contents len-1 in top 6 bits, and offset in next four bytes.
930 for (uint8_t len = 1; len <= 64; ++len) {
931 dst[COPY_4_BYTE_OFFSET | ((len - 1) << 2)] = MakeEntry(4, len, 0);
932 assigned++;
933 }
934
935 // Check that each entry was initialized exactly once.
936 EXPECT_EQ(256, assigned) << "Assigned only " << assigned << " of 256";
937 for (int i = 0; i < 256; ++i) {
938 EXPECT_NE(0xffff, dst[i]) << "Did not assign byte " << i;
939 }
940
941 if (snappy::GetFlag(FLAGS_snappy_dump_decompression_table)) {
942 std::printf("static const uint16_t char_table[256] = {\n ");
943 for (int i = 0; i < 256; ++i) {
944 std::printf("0x%04x%s",
945 dst[i],
946 ((i == 255) ? "\n" : (((i % 8) == 7) ? ",\n " : ", ")));
947 }
948 std::printf("};\n");
949 }
950
951 // Check that computed table matched recorded table.
952 for (int i = 0; i < 256; ++i) {
953 EXPECT_EQ(dst[i], char_table[i]) << "Mismatch in byte " << i;
954 }
955}
956
957TEST(Snappy, TestBenchmarkFiles) {
958 for (int i = 0; i < ARRAYSIZE(kTestDataFiles); ++i) {
959 Verify(ReadTestDataFile(kTestDataFiles[i].filename,
960 kTestDataFiles[i].size_limit));
961 }
962}
963
964} // namespace
965
966} // namespace snappy