Squashed 'third_party/snappy/' content from commit fe18b4632

Change-Id: Ifce86fec16230732a761515b70624a35f3095aea
git-subtree-dir: third_party/snappy
git-subtree-split: fe18b46322727fe4bc6939326ba6bde1e0858b61
Signed-off-by: James Kuszmaul <jabukuszmaul+collab@gmail.com>
diff --git a/snappy_test_tool.cc b/snappy_test_tool.cc
new file mode 100644
index 0000000..a7c779b
--- /dev/null
+++ b/snappy_test_tool.cc
@@ -0,0 +1,471 @@
+// Copyright 2020 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <cstdlib>
+#include <random>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "snappy-test.h"
+
+#include "snappy-internal.h"
+#include "snappy-sinksource.h"
+#include "snappy.h"
+#include "snappy_test_data.h"
+
+SNAPPY_FLAG(int32_t, start_len, -1,
+            "Starting prefix size for testing (-1: just full file contents)");
+SNAPPY_FLAG(int32_t, end_len, -1,
+            "Starting prefix size for testing (-1: just full file contents)");
+SNAPPY_FLAG(int32_t, bytes, 10485760,
+            "How many bytes to compress/uncompress per file for timing");
+
+SNAPPY_FLAG(bool, zlib, true,
+            "Run zlib compression (http://www.zlib.net)");
+SNAPPY_FLAG(bool, lzo, true,
+            "Run LZO compression (http://www.oberhumer.com/opensource/lzo/)");
+SNAPPY_FLAG(bool, lz4, true,
+            "Run LZ4 compression (https://github.com/lz4/lz4)");
+SNAPPY_FLAG(bool, snappy, true, "Run snappy compression");
+
+SNAPPY_FLAG(bool, write_compressed, false,
+            "Write compressed versions of each file to <file>.comp");
+SNAPPY_FLAG(bool, write_uncompressed, false,
+            "Write uncompressed versions of each file to <file>.uncomp");
+
+namespace snappy {
+
+namespace {
+
+#if HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF
+
+// To test against code that reads beyond its input, this class copies a
+// string to a newly allocated group of pages, the last of which
+// is made unreadable via mprotect. Note that we need to allocate the
+// memory with mmap(), as POSIX allows mprotect() only on memory allocated
+// with mmap(), and some malloc/posix_memalign implementations expect to
+// be able to read previously allocated memory while doing heap allocations.
+class DataEndingAtUnreadablePage {
+ public:
+  explicit DataEndingAtUnreadablePage(const std::string& s) {
+    const size_t page_size = sysconf(_SC_PAGESIZE);
+    const size_t size = s.size();
+    // Round up space for string to a multiple of page_size.
+    size_t space_for_string = (size + page_size - 1) & ~(page_size - 1);
+    alloc_size_ = space_for_string + page_size;
+    mem_ = mmap(NULL, alloc_size_,
+                PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+    CHECK_NE(MAP_FAILED, mem_);
+    protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string;
+    char* dst = protected_page_ - size;
+    std::memcpy(dst, s.data(), size);
+    data_ = dst;
+    size_ = size;
+    // Make guard page unreadable.
+    CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_NONE));
+  }
+
+  ~DataEndingAtUnreadablePage() {
+    const size_t page_size = sysconf(_SC_PAGESIZE);
+    // Undo the mprotect.
+    CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ|PROT_WRITE));
+    CHECK_EQ(0, munmap(mem_, alloc_size_));
+  }
+
+  const char* data() const { return data_; }
+  size_t size() const { return size_; }
+
+ private:
+  size_t alloc_size_;
+  void* mem_;
+  char* protected_page_;
+  const char* data_;
+  size_t size_;
+};
+
+#else  // HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF
+
+// Fallback for systems without mmap.
+using DataEndingAtUnreadablePage = std::string;
+
+#endif
+
+enum CompressorType { ZLIB, LZO, LZ4, SNAPPY };
+
+const char* names[] = {"ZLIB", "LZO", "LZ4", "SNAPPY"};
+
+size_t MinimumRequiredOutputSpace(size_t input_size, CompressorType comp) {
+  switch (comp) {
+#ifdef ZLIB_VERSION
+    case ZLIB:
+      return ZLib::MinCompressbufSize(input_size);
+#endif  // ZLIB_VERSION
+
+#ifdef LZO_VERSION
+    case LZO:
+      return input_size + input_size/64 + 16 + 3;
+#endif  // LZO_VERSION
+
+#ifdef LZ4_VERSION_NUMBER
+    case LZ4:
+      return LZ4_compressBound(input_size);
+#endif  // LZ4_VERSION_NUMBER
+
+    case SNAPPY:
+      return snappy::MaxCompressedLength(input_size);
+
+    default:
+      LOG(FATAL) << "Unknown compression type number " << comp;
+      return 0;
+  }
+}
+
+// Returns true if we successfully compressed, false otherwise.
+//
+// If compressed_is_preallocated is set, do not resize the compressed buffer.
+// This is typically what you want for a benchmark, in order to not spend
+// time in the memory allocator. If you do set this flag, however,
+// "compressed" must be preinitialized to at least MinCompressbufSize(comp)
+// number of bytes, and may contain junk bytes at the end after return.
+bool Compress(const char* input, size_t input_size, CompressorType comp,
+              std::string* compressed, bool compressed_is_preallocated) {
+  if (!compressed_is_preallocated) {
+    compressed->resize(MinimumRequiredOutputSpace(input_size, comp));
+  }
+
+  switch (comp) {
+#ifdef ZLIB_VERSION
+    case ZLIB: {
+      ZLib zlib;
+      uLongf destlen = compressed->size();
+      int ret = zlib.Compress(
+          reinterpret_cast<Bytef*>(string_as_array(compressed)),
+          &destlen,
+          reinterpret_cast<const Bytef*>(input),
+          input_size);
+      CHECK_EQ(Z_OK, ret);
+      if (!compressed_is_preallocated) {
+        compressed->resize(destlen);
+      }
+      return true;
+    }
+#endif  // ZLIB_VERSION
+
+#ifdef LZO_VERSION
+    case LZO: {
+      unsigned char* mem = new unsigned char[LZO1X_1_15_MEM_COMPRESS];
+      lzo_uint destlen;
+      int ret = lzo1x_1_15_compress(
+          reinterpret_cast<const uint8_t*>(input),
+          input_size,
+          reinterpret_cast<uint8_t*>(string_as_array(compressed)),
+          &destlen,
+          mem);
+      CHECK_EQ(LZO_E_OK, ret);
+      delete[] mem;
+      if (!compressed_is_preallocated) {
+        compressed->resize(destlen);
+      }
+      break;
+    }
+#endif  // LZO_VERSION
+
+#ifdef LZ4_VERSION_NUMBER
+    case LZ4: {
+      int destlen = compressed->size();
+      destlen = LZ4_compress_default(input, string_as_array(compressed),
+                                     input_size, destlen);
+      CHECK_NE(destlen, 0);
+      if (!compressed_is_preallocated) {
+        compressed->resize(destlen);
+      }
+      break;
+    }
+#endif  // LZ4_VERSION_NUMBER
+
+    case SNAPPY: {
+      size_t destlen;
+      snappy::RawCompress(input, input_size,
+                          string_as_array(compressed),
+                          &destlen);
+      CHECK_LE(destlen, snappy::MaxCompressedLength(input_size));
+      if (!compressed_is_preallocated) {
+        compressed->resize(destlen);
+      }
+      break;
+    }
+
+    default: {
+      return false;     // the asked-for library wasn't compiled in
+    }
+  }
+  return true;
+}
+
+bool Uncompress(const std::string& compressed, CompressorType comp, int size,
+                std::string* output) {
+  // TODO: Switch to [[maybe_unused]] when we can assume C++17.
+  (void)size;
+  switch (comp) {
+#ifdef ZLIB_VERSION
+    case ZLIB: {
+      output->resize(size);
+      ZLib zlib;
+      uLongf destlen = output->size();
+      int ret = zlib.Uncompress(
+          reinterpret_cast<Bytef*>(string_as_array(output)),
+          &destlen,
+          reinterpret_cast<const Bytef*>(compressed.data()),
+          compressed.size());
+      CHECK_EQ(Z_OK, ret);
+      CHECK_EQ(static_cast<uLongf>(size), destlen);
+      break;
+    }
+#endif  // ZLIB_VERSION
+
+#ifdef LZO_VERSION
+    case LZO: {
+      output->resize(size);
+      lzo_uint destlen;
+      int ret = lzo1x_decompress(
+          reinterpret_cast<const uint8_t*>(compressed.data()),
+          compressed.size(),
+          reinterpret_cast<uint8_t*>(string_as_array(output)),
+          &destlen,
+          NULL);
+      CHECK_EQ(LZO_E_OK, ret);
+      CHECK_EQ(static_cast<lzo_uint>(size), destlen);
+      break;
+    }
+#endif  // LZO_VERSION
+
+#ifdef LZ4_VERSION_NUMBER
+    case LZ4: {
+      output->resize(size);
+      int destlen = output->size();
+      destlen = LZ4_decompress_safe(compressed.data(), string_as_array(output),
+                                    compressed.size(), destlen);
+      CHECK_NE(destlen, 0);
+      CHECK_EQ(size, destlen);
+      break;
+    }
+#endif  // LZ4_VERSION_NUMBER
+    case SNAPPY: {
+      snappy::RawUncompress(compressed.data(), compressed.size(),
+                            string_as_array(output));
+      break;
+    }
+
+    default: {
+      return false;     // the asked-for library wasn't compiled in
+    }
+  }
+  return true;
+}
+
+void Measure(const char* data, size_t length, CompressorType comp, int repeats,
+             int block_size) {
+  // Run tests a few time and pick median running times
+  static const int kRuns = 5;
+  double ctime[kRuns];
+  double utime[kRuns];
+  int compressed_size = 0;
+
+  {
+    // Chop the input into blocks
+    int num_blocks = (length + block_size - 1) / block_size;
+    std::vector<const char*> input(num_blocks);
+    std::vector<size_t> input_length(num_blocks);
+    std::vector<std::string> compressed(num_blocks);
+    std::vector<std::string> output(num_blocks);
+    for (int b = 0; b < num_blocks; ++b) {
+      int input_start = b * block_size;
+      int input_limit = std::min<int>((b+1)*block_size, length);
+      input[b] = data+input_start;
+      input_length[b] = input_limit-input_start;
+    }
+
+    // Pre-grow the output buffers so we don't measure string append time.
+    for (std::string& compressed_block : compressed) {
+      compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp));
+    }
+
+    // First, try one trial compression to make sure the code is compiled in
+    if (!Compress(input[0], input_length[0], comp, &compressed[0], true)) {
+      LOG(WARNING) << "Skipping " << names[comp] << ": "
+                   << "library not compiled in";
+      return;
+    }
+
+    for (int run = 0; run < kRuns; ++run) {
+      CycleTimer ctimer, utimer;
+
+      // Pre-grow the output buffers so we don't measure string append time.
+      for (std::string& compressed_block : compressed) {
+        compressed_block.resize(MinimumRequiredOutputSpace(block_size, comp));
+      }
+
+      ctimer.Start();
+      for (int b = 0; b < num_blocks; ++b) {
+        for (int i = 0; i < repeats; ++i)
+          Compress(input[b], input_length[b], comp, &compressed[b], true);
+      }
+      ctimer.Stop();
+
+      // Compress once more, with resizing, so we don't leave junk
+      // at the end that will confuse the decompressor.
+      for (int b = 0; b < num_blocks; ++b) {
+        Compress(input[b], input_length[b], comp, &compressed[b], false);
+      }
+
+      for (int b = 0; b < num_blocks; ++b) {
+        output[b].resize(input_length[b]);
+      }
+
+      utimer.Start();
+      for (int i = 0; i < repeats; ++i) {
+        for (int b = 0; b < num_blocks; ++b)
+          Uncompress(compressed[b], comp, input_length[b], &output[b]);
+      }
+      utimer.Stop();
+
+      ctime[run] = ctimer.Get();
+      utime[run] = utimer.Get();
+    }
+
+    compressed_size = 0;
+    for (const std::string& compressed_item : compressed) {
+      compressed_size += compressed_item.size();
+    }
+  }
+
+  std::sort(ctime, ctime + kRuns);
+  std::sort(utime, utime + kRuns);
+  const int med = kRuns/2;
+
+  float comp_rate = (length / ctime[med]) * repeats / 1048576.0;
+  float uncomp_rate = (length / utime[med]) * repeats / 1048576.0;
+  std::string x = names[comp];
+  x += ":";
+  std::string urate = (uncomp_rate >= 0) ? StrFormat("%.1f", uncomp_rate)
+                                         : std::string("?");
+  std::printf("%-7s [b %dM] bytes %6d -> %6d %4.1f%%  "
+              "comp %5.1f MB/s  uncomp %5s MB/s\n",
+              x.c_str(),
+              block_size/(1<<20),
+              static_cast<int>(length), static_cast<uint32_t>(compressed_size),
+              (compressed_size * 100.0) / std::max<int>(1, length),
+              comp_rate,
+              urate.c_str());
+}
+
+void CompressFile(const char* fname) {
+  std::string fullinput;
+  CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
+
+  std::string compressed;
+  Compress(fullinput.data(), fullinput.size(), SNAPPY, &compressed, false);
+
+  CHECK_OK(file::SetContents(std::string(fname).append(".comp"), compressed,
+                             file::Defaults()));
+}
+
+void UncompressFile(const char* fname) {
+  std::string fullinput;
+  CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
+
+  size_t uncompLength;
+  CHECK(snappy::GetUncompressedLength(fullinput.data(), fullinput.size(),
+                                      &uncompLength));
+
+  std::string uncompressed;
+  uncompressed.resize(uncompLength);
+  CHECK(snappy::Uncompress(fullinput.data(), fullinput.size(), &uncompressed));
+
+  CHECK_OK(file::SetContents(std::string(fname).append(".uncomp"), uncompressed,
+                             file::Defaults()));
+}
+
+void MeasureFile(const char* fname) {
+  std::string fullinput;
+  CHECK_OK(file::GetContents(fname, &fullinput, file::Defaults()));
+  std::printf("%-40s :\n", fname);
+
+  int start_len = (snappy::GetFlag(FLAGS_start_len) < 0)
+                      ? fullinput.size()
+                      : snappy::GetFlag(FLAGS_start_len);
+  int end_len = fullinput.size();
+  if (snappy::GetFlag(FLAGS_end_len) >= 0) {
+    end_len = std::min<int>(fullinput.size(), snappy::GetFlag(FLAGS_end_len));
+  }
+  for (int len = start_len; len <= end_len; ++len) {
+    const char* const input = fullinput.data();
+    int repeats = (snappy::GetFlag(FLAGS_bytes) + len) / (len + 1);
+    if (snappy::GetFlag(FLAGS_zlib))
+      Measure(input, len, ZLIB, repeats, 1024 << 10);
+    if (snappy::GetFlag(FLAGS_lzo))
+      Measure(input, len, LZO, repeats, 1024 << 10);
+    if (snappy::GetFlag(FLAGS_lz4))
+      Measure(input, len, LZ4, repeats, 1024 << 10);
+    if (snappy::GetFlag(FLAGS_snappy))
+      Measure(input, len, SNAPPY, repeats, 4096 << 10);
+
+    // For block-size based measurements
+    if (0 && snappy::GetFlag(FLAGS_snappy)) {
+      Measure(input, len, SNAPPY, repeats, 8<<10);
+      Measure(input, len, SNAPPY, repeats, 16<<10);
+      Measure(input, len, SNAPPY, repeats, 32<<10);
+      Measure(input, len, SNAPPY, repeats, 64<<10);
+      Measure(input, len, SNAPPY, repeats, 256<<10);
+      Measure(input, len, SNAPPY, repeats, 1024<<10);
+    }
+  }
+}
+
+}  // namespace
+
+}  // namespace snappy
+
+int main(int argc, char** argv) {
+  InitGoogle(argv[0], &argc, &argv, true);
+
+  for (int arg = 1; arg < argc; ++arg) {
+    if (snappy::GetFlag(FLAGS_write_compressed)) {
+      snappy::CompressFile(argv[arg]);
+    } else if (snappy::GetFlag(FLAGS_write_uncompressed)) {
+      snappy::UncompressFile(argv[arg]);
+    } else {
+      snappy::MeasureFile(argv[arg]);
+    }
+  }
+  return 0;
+}