Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame^] | 1 | // Copyright 2017 The Abseil Authors. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | // Benchmarks for absl random distributions as well as a selection of the |
| 16 | // C++ standard library random distributions. |
| 17 | |
| 18 | #include <algorithm> |
| 19 | #include <cstddef> |
| 20 | #include <cstdint> |
| 21 | #include <initializer_list> |
| 22 | #include <iterator> |
| 23 | #include <limits> |
| 24 | #include <random> |
| 25 | #include <type_traits> |
| 26 | #include <vector> |
| 27 | |
| 28 | #include "absl/base/macros.h" |
| 29 | #include "absl/meta/type_traits.h" |
| 30 | #include "absl/random/bernoulli_distribution.h" |
| 31 | #include "absl/random/beta_distribution.h" |
| 32 | #include "absl/random/exponential_distribution.h" |
| 33 | #include "absl/random/gaussian_distribution.h" |
| 34 | #include "absl/random/internal/fast_uniform_bits.h" |
| 35 | #include "absl/random/internal/randen_engine.h" |
| 36 | #include "absl/random/log_uniform_int_distribution.h" |
| 37 | #include "absl/random/poisson_distribution.h" |
| 38 | #include "absl/random/random.h" |
| 39 | #include "absl/random/uniform_int_distribution.h" |
| 40 | #include "absl/random/uniform_real_distribution.h" |
| 41 | #include "absl/random/zipf_distribution.h" |
| 42 | #include "benchmark/benchmark.h" |
| 43 | |
| 44 | namespace { |
| 45 | |
| 46 | // Seed data to avoid reading random_device() for benchmarks. |
| 47 | uint32_t kSeedData[] = { |
| 48 | 0x1B510052, 0x9A532915, 0xD60F573F, 0xBC9BC6E4, 0x2B60A476, 0x81E67400, |
| 49 | 0x08BA6FB5, 0x571BE91F, 0xF296EC6B, 0x2A0DD915, 0xB6636521, 0xE7B9F9B6, |
| 50 | 0xFF34052E, 0xC5855664, 0x53B02D5D, 0xA99F8FA1, 0x08BA4799, 0x6E85076A, |
| 51 | 0x4B7A70E9, 0xB5B32944, 0xDB75092E, 0xC4192623, 0xAD6EA6B0, 0x49A7DF7D, |
| 52 | 0x9CEE60B8, 0x8FEDB266, 0xECAA8C71, 0x699A18FF, 0x5664526C, 0xC2B19EE1, |
| 53 | 0x193602A5, 0x75094C29, 0xA0591340, 0xE4183A3E, 0x3F54989A, 0x5B429D65, |
| 54 | 0x6B8FE4D6, 0x99F73FD6, 0xA1D29C07, 0xEFE830F5, 0x4D2D38E6, 0xF0255DC1, |
| 55 | 0x4CDD2086, 0x8470EB26, 0x6382E9C6, 0x021ECC5E, 0x09686B3F, 0x3EBAEFC9, |
| 56 | 0x3C971814, 0x6B6A70A1, 0x687F3584, 0x52A0E286, 0x13198A2E, 0x03707344, |
| 57 | }; |
| 58 | |
| 59 | // PrecompiledSeedSeq provides kSeedData to a conforming |
| 60 | // random engine to speed initialization in the benchmarks. |
| 61 | class PrecompiledSeedSeq { |
| 62 | public: |
| 63 | using result_type = uint32_t; |
| 64 | |
| 65 | PrecompiledSeedSeq() {} |
| 66 | |
| 67 | template <typename Iterator> |
| 68 | PrecompiledSeedSeq(Iterator begin, Iterator end) {} |
| 69 | |
| 70 | template <typename T> |
| 71 | PrecompiledSeedSeq(std::initializer_list<T> il) {} |
| 72 | |
| 73 | template <typename OutIterator> |
| 74 | void generate(OutIterator begin, OutIterator end) { |
| 75 | static size_t idx = 0; |
| 76 | for (; begin != end; begin++) { |
| 77 | *begin = kSeedData[idx++]; |
| 78 | if (idx >= ABSL_ARRAYSIZE(kSeedData)) { |
| 79 | idx = 0; |
| 80 | } |
| 81 | } |
| 82 | } |
| 83 | |
| 84 | size_t size() const { return ABSL_ARRAYSIZE(kSeedData); } |
| 85 | |
| 86 | template <typename OutIterator> |
| 87 | void param(OutIterator out) const { |
| 88 | std::copy(std::begin(kSeedData), std::end(kSeedData), out); |
| 89 | } |
| 90 | }; |
| 91 | |
| 92 | // use_default_initialization<T> indicates whether the random engine |
| 93 | // T must be default initialized, or whether we may initialize it using |
| 94 | // a seed sequence. This is used because some engines do not accept seed |
| 95 | // sequence-based initialization. |
| 96 | template <typename E> |
| 97 | using use_default_initialization = std::false_type; |
| 98 | |
| 99 | // make_engine<T, SSeq> returns a random_engine which is initialized, |
| 100 | // either via the default constructor, when use_default_initialization<T> |
| 101 | // is true, or via the indicated seed sequence, SSeq. |
| 102 | template <typename Engine, typename SSeq = PrecompiledSeedSeq> |
| 103 | typename absl::enable_if_t<!use_default_initialization<Engine>::value, Engine> |
| 104 | make_engine() { |
| 105 | // Initialize the random engine using the seed sequence SSeq, which |
| 106 | // is constructed from the precompiled seed data. |
| 107 | SSeq seq(std::begin(kSeedData), std::end(kSeedData)); |
| 108 | return Engine(seq); |
| 109 | } |
| 110 | |
| 111 | template <typename Engine, typename SSeq = PrecompiledSeedSeq> |
| 112 | typename absl::enable_if_t<use_default_initialization<Engine>::value, Engine> |
| 113 | make_engine() { |
| 114 | // Initialize the random engine using the default constructor. |
| 115 | return Engine(); |
| 116 | } |
| 117 | |
| 118 | template <typename Engine, typename SSeq> |
| 119 | void BM_Construct(benchmark::State& state) { |
| 120 | for (auto _ : state) { |
| 121 | auto rng = make_engine<Engine, SSeq>(); |
| 122 | benchmark::DoNotOptimize(rng()); |
| 123 | } |
| 124 | } |
| 125 | |
| 126 | template <typename Engine> |
| 127 | void BM_Direct(benchmark::State& state) { |
| 128 | using value_type = typename Engine::result_type; |
| 129 | // Direct use of the URBG. |
| 130 | auto rng = make_engine<Engine>(); |
| 131 | for (auto _ : state) { |
| 132 | benchmark::DoNotOptimize(rng()); |
| 133 | } |
| 134 | state.SetBytesProcessed(sizeof(value_type) * state.iterations()); |
| 135 | } |
| 136 | |
| 137 | template <typename Engine> |
| 138 | void BM_Generate(benchmark::State& state) { |
| 139 | // std::generate makes a copy of the RNG; thus this tests the |
| 140 | // copy-constructor efficiency. |
| 141 | using value_type = typename Engine::result_type; |
| 142 | std::vector<value_type> v(64); |
| 143 | auto rng = make_engine<Engine>(); |
| 144 | while (state.KeepRunningBatch(64)) { |
| 145 | std::generate(std::begin(v), std::end(v), rng); |
| 146 | } |
| 147 | } |
| 148 | |
| 149 | template <typename Engine, size_t elems> |
| 150 | void BM_Shuffle(benchmark::State& state) { |
| 151 | // Direct use of the Engine. |
| 152 | std::vector<uint32_t> v(elems); |
| 153 | while (state.KeepRunningBatch(elems)) { |
| 154 | auto rng = make_engine<Engine>(); |
| 155 | std::shuffle(std::begin(v), std::end(v), rng); |
| 156 | } |
| 157 | } |
| 158 | |
| 159 | template <typename Engine, size_t elems> |
| 160 | void BM_ShuffleReuse(benchmark::State& state) { |
| 161 | // Direct use of the Engine. |
| 162 | std::vector<uint32_t> v(elems); |
| 163 | auto rng = make_engine<Engine>(); |
| 164 | while (state.KeepRunningBatch(elems)) { |
| 165 | std::shuffle(std::begin(v), std::end(v), rng); |
| 166 | } |
| 167 | } |
| 168 | |
| 169 | template <typename Engine, typename Dist, typename... Args> |
| 170 | void BM_Dist(benchmark::State& state, Args&&... args) { |
| 171 | using value_type = typename Dist::result_type; |
| 172 | auto rng = make_engine<Engine>(); |
| 173 | Dist dis{std::forward<Args>(args)...}; |
| 174 | // Compare the following loop performance: |
| 175 | for (auto _ : state) { |
| 176 | benchmark::DoNotOptimize(dis(rng)); |
| 177 | } |
| 178 | state.SetBytesProcessed(sizeof(value_type) * state.iterations()); |
| 179 | } |
| 180 | |
| 181 | template <typename Engine, typename Dist> |
| 182 | void BM_Large(benchmark::State& state) { |
| 183 | using value_type = typename Dist::result_type; |
| 184 | volatile value_type kMin = 0; |
| 185 | volatile value_type kMax = std::numeric_limits<value_type>::max() / 2 + 1; |
| 186 | BM_Dist<Engine, Dist>(state, kMin, kMax); |
| 187 | } |
| 188 | |
| 189 | template <typename Engine, typename Dist> |
| 190 | void BM_Small(benchmark::State& state) { |
| 191 | using value_type = typename Dist::result_type; |
| 192 | volatile value_type kMin = 0; |
| 193 | volatile value_type kMax = std::numeric_limits<value_type>::max() / 64 + 1; |
| 194 | BM_Dist<Engine, Dist>(state, kMin, kMax); |
| 195 | } |
| 196 | |
| 197 | template <typename Engine, typename Dist, int A> |
| 198 | void BM_Bernoulli(benchmark::State& state) { |
| 199 | volatile double a = static_cast<double>(A) / 1000000; |
| 200 | BM_Dist<Engine, Dist>(state, a); |
| 201 | } |
| 202 | |
| 203 | template <typename Engine, typename Dist, int A, int B> |
| 204 | void BM_Beta(benchmark::State& state) { |
| 205 | using value_type = typename Dist::result_type; |
| 206 | volatile value_type a = static_cast<value_type>(A) / 100; |
| 207 | volatile value_type b = static_cast<value_type>(B) / 100; |
| 208 | BM_Dist<Engine, Dist>(state, a, b); |
| 209 | } |
| 210 | |
| 211 | template <typename Engine, typename Dist, int A> |
| 212 | void BM_Gamma(benchmark::State& state) { |
| 213 | using value_type = typename Dist::result_type; |
| 214 | volatile value_type a = static_cast<value_type>(A) / 100; |
| 215 | BM_Dist<Engine, Dist>(state, a); |
| 216 | } |
| 217 | |
| 218 | template <typename Engine, typename Dist, int A = 100> |
| 219 | void BM_Poisson(benchmark::State& state) { |
| 220 | volatile double a = static_cast<double>(A) / 100; |
| 221 | BM_Dist<Engine, Dist>(state, a); |
| 222 | } |
| 223 | |
| 224 | template <typename Engine, typename Dist, int Q = 2, int V = 1> |
| 225 | void BM_Zipf(benchmark::State& state) { |
| 226 | using value_type = typename Dist::result_type; |
| 227 | volatile double q = Q; |
| 228 | volatile double v = V; |
| 229 | BM_Dist<Engine, Dist>(state, std::numeric_limits<value_type>::max(), q, v); |
| 230 | } |
| 231 | |
| 232 | template <typename Engine, typename Dist> |
| 233 | void BM_Thread(benchmark::State& state) { |
| 234 | using value_type = typename Dist::result_type; |
| 235 | auto rng = make_engine<Engine>(); |
| 236 | Dist dis{}; |
| 237 | for (auto _ : state) { |
| 238 | benchmark::DoNotOptimize(dis(rng)); |
| 239 | } |
| 240 | state.SetBytesProcessed(sizeof(value_type) * state.iterations()); |
| 241 | } |
| 242 | |
| 243 | // NOTES: |
| 244 | // |
| 245 | // std::geometric_distribution is similar to the zipf distributions. |
| 246 | // The algorithm for the geometric_distribution is, basically, |
| 247 | // floor(log(1-X) / log(1-p)) |
| 248 | |
| 249 | // Normal benchmark suite |
| 250 | #define BM_BASIC(Engine) \ |
| 251 | BENCHMARK_TEMPLATE(BM_Construct, Engine, PrecompiledSeedSeq); \ |
| 252 | BENCHMARK_TEMPLATE(BM_Construct, Engine, std::seed_seq); \ |
| 253 | BENCHMARK_TEMPLATE(BM_Direct, Engine); \ |
| 254 | BENCHMARK_TEMPLATE(BM_Shuffle, Engine, 10); \ |
| 255 | BENCHMARK_TEMPLATE(BM_Shuffle, Engine, 100); \ |
| 256 | BENCHMARK_TEMPLATE(BM_Shuffle, Engine, 1000); \ |
| 257 | BENCHMARK_TEMPLATE(BM_ShuffleReuse, Engine, 100); \ |
| 258 | BENCHMARK_TEMPLATE(BM_ShuffleReuse, Engine, 1000); \ |
| 259 | BENCHMARK_TEMPLATE(BM_Dist, Engine, \ |
| 260 | absl::random_internal::FastUniformBits<uint32_t>); \ |
| 261 | BENCHMARK_TEMPLATE(BM_Dist, Engine, \ |
| 262 | absl::random_internal::FastUniformBits<uint64_t>); \ |
| 263 | BENCHMARK_TEMPLATE(BM_Dist, Engine, std::uniform_int_distribution<int32_t>); \ |
| 264 | BENCHMARK_TEMPLATE(BM_Dist, Engine, std::uniform_int_distribution<int64_t>); \ |
| 265 | BENCHMARK_TEMPLATE(BM_Dist, Engine, \ |
| 266 | absl::uniform_int_distribution<int32_t>); \ |
| 267 | BENCHMARK_TEMPLATE(BM_Dist, Engine, \ |
| 268 | absl::uniform_int_distribution<int64_t>); \ |
| 269 | BENCHMARK_TEMPLATE(BM_Large, Engine, \ |
| 270 | std::uniform_int_distribution<int32_t>); \ |
| 271 | BENCHMARK_TEMPLATE(BM_Large, Engine, \ |
| 272 | std::uniform_int_distribution<int64_t>); \ |
| 273 | BENCHMARK_TEMPLATE(BM_Large, Engine, \ |
| 274 | absl::uniform_int_distribution<int32_t>); \ |
| 275 | BENCHMARK_TEMPLATE(BM_Large, Engine, \ |
| 276 | absl::uniform_int_distribution<int64_t>); \ |
| 277 | BENCHMARK_TEMPLATE(BM_Dist, Engine, std::uniform_real_distribution<float>); \ |
| 278 | BENCHMARK_TEMPLATE(BM_Dist, Engine, std::uniform_real_distribution<double>); \ |
| 279 | BENCHMARK_TEMPLATE(BM_Dist, Engine, absl::uniform_real_distribution<float>); \ |
| 280 | BENCHMARK_TEMPLATE(BM_Dist, Engine, absl::uniform_real_distribution<double>) |
| 281 | |
| 282 | #define BM_COPY(Engine) BENCHMARK_TEMPLATE(BM_Generate, Engine) |
| 283 | |
| 284 | #define BM_THREAD(Engine) \ |
| 285 | BENCHMARK_TEMPLATE(BM_Thread, Engine, \ |
| 286 | absl::uniform_int_distribution<int64_t>) \ |
| 287 | ->ThreadPerCpu(); \ |
| 288 | BENCHMARK_TEMPLATE(BM_Thread, Engine, \ |
| 289 | absl::uniform_real_distribution<double>) \ |
| 290 | ->ThreadPerCpu(); \ |
| 291 | BENCHMARK_TEMPLATE(BM_Shuffle, Engine, 100)->ThreadPerCpu(); \ |
| 292 | BENCHMARK_TEMPLATE(BM_Shuffle, Engine, 1000)->ThreadPerCpu(); \ |
| 293 | BENCHMARK_TEMPLATE(BM_ShuffleReuse, Engine, 100)->ThreadPerCpu(); \ |
| 294 | BENCHMARK_TEMPLATE(BM_ShuffleReuse, Engine, 1000)->ThreadPerCpu(); |
| 295 | |
| 296 | #define BM_EXTENDED(Engine) \ |
| 297 | /* -------------- Extended Uniform -----------------------*/ \ |
| 298 | BENCHMARK_TEMPLATE(BM_Small, Engine, \ |
| 299 | std::uniform_int_distribution<int32_t>); \ |
| 300 | BENCHMARK_TEMPLATE(BM_Small, Engine, \ |
| 301 | std::uniform_int_distribution<int64_t>); \ |
| 302 | BENCHMARK_TEMPLATE(BM_Small, Engine, \ |
| 303 | absl::uniform_int_distribution<int32_t>); \ |
| 304 | BENCHMARK_TEMPLATE(BM_Small, Engine, \ |
| 305 | absl::uniform_int_distribution<int64_t>); \ |
| 306 | BENCHMARK_TEMPLATE(BM_Small, Engine, std::uniform_real_distribution<float>); \ |
| 307 | BENCHMARK_TEMPLATE(BM_Small, Engine, \ |
| 308 | std::uniform_real_distribution<double>); \ |
| 309 | BENCHMARK_TEMPLATE(BM_Small, Engine, \ |
| 310 | absl::uniform_real_distribution<float>); \ |
| 311 | BENCHMARK_TEMPLATE(BM_Small, Engine, \ |
| 312 | absl::uniform_real_distribution<double>); \ |
| 313 | /* -------------- Other -----------------------*/ \ |
| 314 | BENCHMARK_TEMPLATE(BM_Dist, Engine, std::normal_distribution<double>); \ |
| 315 | BENCHMARK_TEMPLATE(BM_Dist, Engine, absl::gaussian_distribution<double>); \ |
| 316 | BENCHMARK_TEMPLATE(BM_Dist, Engine, std::exponential_distribution<double>); \ |
| 317 | BENCHMARK_TEMPLATE(BM_Dist, Engine, absl::exponential_distribution<double>); \ |
| 318 | BENCHMARK_TEMPLATE(BM_Poisson, Engine, std::poisson_distribution<int64_t>, \ |
| 319 | 100); \ |
| 320 | BENCHMARK_TEMPLATE(BM_Poisson, Engine, absl::poisson_distribution<int64_t>, \ |
| 321 | 100); \ |
| 322 | BENCHMARK_TEMPLATE(BM_Poisson, Engine, std::poisson_distribution<int64_t>, \ |
| 323 | 10 * 100); \ |
| 324 | BENCHMARK_TEMPLATE(BM_Poisson, Engine, absl::poisson_distribution<int64_t>, \ |
| 325 | 10 * 100); \ |
| 326 | BENCHMARK_TEMPLATE(BM_Poisson, Engine, std::poisson_distribution<int64_t>, \ |
| 327 | 13 * 100); \ |
| 328 | BENCHMARK_TEMPLATE(BM_Poisson, Engine, absl::poisson_distribution<int64_t>, \ |
| 329 | 13 * 100); \ |
| 330 | BENCHMARK_TEMPLATE(BM_Dist, Engine, \ |
| 331 | absl::log_uniform_int_distribution<int32_t>); \ |
| 332 | BENCHMARK_TEMPLATE(BM_Dist, Engine, \ |
| 333 | absl::log_uniform_int_distribution<int64_t>); \ |
| 334 | BENCHMARK_TEMPLATE(BM_Dist, Engine, std::geometric_distribution<int64_t>); \ |
| 335 | BENCHMARK_TEMPLATE(BM_Zipf, Engine, absl::zipf_distribution<uint64_t>); \ |
| 336 | BENCHMARK_TEMPLATE(BM_Zipf, Engine, absl::zipf_distribution<uint64_t>, 2, \ |
| 337 | 3); \ |
| 338 | BENCHMARK_TEMPLATE(BM_Bernoulli, Engine, std::bernoulli_distribution, \ |
| 339 | 257305); \ |
| 340 | BENCHMARK_TEMPLATE(BM_Bernoulli, Engine, absl::bernoulli_distribution, \ |
| 341 | 257305); \ |
| 342 | BENCHMARK_TEMPLATE(BM_Beta, Engine, absl::beta_distribution<double>, 65, \ |
| 343 | 41); \ |
| 344 | BENCHMARK_TEMPLATE(BM_Beta, Engine, absl::beta_distribution<double>, 99, \ |
| 345 | 330); \ |
| 346 | BENCHMARK_TEMPLATE(BM_Beta, Engine, absl::beta_distribution<double>, 150, \ |
| 347 | 150); \ |
| 348 | BENCHMARK_TEMPLATE(BM_Beta, Engine, absl::beta_distribution<double>, 410, \ |
| 349 | 580); \ |
| 350 | BENCHMARK_TEMPLATE(BM_Beta, Engine, absl::beta_distribution<float>, 65, 41); \ |
| 351 | BENCHMARK_TEMPLATE(BM_Beta, Engine, absl::beta_distribution<float>, 99, \ |
| 352 | 330); \ |
| 353 | BENCHMARK_TEMPLATE(BM_Beta, Engine, absl::beta_distribution<float>, 150, \ |
| 354 | 150); \ |
| 355 | BENCHMARK_TEMPLATE(BM_Beta, Engine, absl::beta_distribution<float>, 410, \ |
| 356 | 580); \ |
| 357 | BENCHMARK_TEMPLATE(BM_Gamma, Engine, std::gamma_distribution<float>, 199); \ |
| 358 | BENCHMARK_TEMPLATE(BM_Gamma, Engine, std::gamma_distribution<double>, 199); |
| 359 | |
| 360 | // ABSL Recommended interfaces. |
| 361 | BM_BASIC(absl::InsecureBitGen); // === pcg64_2018_engine |
| 362 | BM_BASIC(absl::BitGen); // === randen_engine<uint64_t>. |
| 363 | BM_THREAD(absl::BitGen); |
| 364 | BM_EXTENDED(absl::BitGen); |
| 365 | |
| 366 | // Instantiate benchmarks for multiple engines. |
| 367 | using randen_engine_64 = absl::random_internal::randen_engine<uint64_t>; |
| 368 | using randen_engine_32 = absl::random_internal::randen_engine<uint32_t>; |
| 369 | |
| 370 | // Comparison interfaces. |
| 371 | BM_BASIC(std::mt19937_64); |
| 372 | BM_COPY(std::mt19937_64); |
| 373 | BM_EXTENDED(std::mt19937_64); |
| 374 | BM_BASIC(randen_engine_64); |
| 375 | BM_COPY(randen_engine_64); |
| 376 | BM_EXTENDED(randen_engine_64); |
| 377 | |
| 378 | BM_BASIC(std::mt19937); |
| 379 | BM_COPY(std::mt19937); |
| 380 | BM_BASIC(randen_engine_32); |
| 381 | BM_COPY(randen_engine_32); |
| 382 | |
| 383 | } // namespace |