Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame^] | 1 | // Copyright 2017 The Abseil Authors. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | #include "absl/random/poisson_distribution.h" |
| 16 | |
| 17 | #include <algorithm> |
| 18 | #include <cstddef> |
| 19 | #include <cstdint> |
| 20 | #include <iterator> |
| 21 | #include <random> |
| 22 | #include <sstream> |
| 23 | #include <string> |
| 24 | #include <vector> |
| 25 | |
| 26 | #include "gmock/gmock.h" |
| 27 | #include "gtest/gtest.h" |
| 28 | #include "absl/base/internal/raw_logging.h" |
| 29 | #include "absl/base/macros.h" |
| 30 | #include "absl/container/flat_hash_map.h" |
| 31 | #include "absl/random/internal/chi_square.h" |
| 32 | #include "absl/random/internal/distribution_test_util.h" |
| 33 | #include "absl/random/internal/sequence_urbg.h" |
| 34 | #include "absl/random/random.h" |
| 35 | #include "absl/strings/str_cat.h" |
| 36 | #include "absl/strings/str_format.h" |
| 37 | #include "absl/strings/str_replace.h" |
| 38 | #include "absl/strings/strip.h" |
| 39 | |
| 40 | // Notes about generating poisson variates: |
| 41 | // |
| 42 | // It is unlikely that any implementation of std::poisson_distribution |
| 43 | // will be stable over time and across library implementations. For instance |
| 44 | // the three different poisson variate generators listed below all differ: |
| 45 | // |
| 46 | // https://github.com/ampl/gsl/tree/master/randist/poisson.c |
| 47 | // * GSL uses a gamma + binomial + knuth method to compute poisson variates. |
| 48 | // |
| 49 | // https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/include/bits/random.tcc |
| 50 | // * GCC uses the Devroye rejection algorithm, based on |
| 51 | // Devroye, L. Non-Uniform Random Variates Generation. Springer-Verlag, |
| 52 | // New York, 1986, Ch. X, Sects. 3.3 & 3.4 (+ Errata!), ~p.511 |
| 53 | // http://www.nrbook.com/devroye/ |
| 54 | // |
| 55 | // https://github.com/llvm-mirror/libcxx/blob/master/include/random |
| 56 | // * CLANG uses a different rejection method, which appears to include a |
| 57 | // normal-distribution approximation and an exponential distribution to |
| 58 | // compute the threshold, including a similar factorial approximation to this |
| 59 | // one, but it is unclear where the algorithm comes from, exactly. |
| 60 | // |
| 61 | |
| 62 | namespace { |
| 63 | |
| 64 | using absl::random_internal::kChiSquared; |
| 65 | |
| 66 | // The PoissonDistributionInterfaceTest provides a basic test that |
| 67 | // absl::poisson_distribution conforms to the interface and serialization |
| 68 | // requirements imposed by [rand.req.dist] for the common integer types. |
| 69 | |
| 70 | template <typename IntType> |
| 71 | class PoissonDistributionInterfaceTest : public ::testing::Test {}; |
| 72 | |
| 73 | using IntTypes = ::testing::Types<int, int8_t, int16_t, int32_t, int64_t, |
| 74 | uint8_t, uint16_t, uint32_t, uint64_t>; |
| 75 | TYPED_TEST_CASE(PoissonDistributionInterfaceTest, IntTypes); |
| 76 | |
| 77 | TYPED_TEST(PoissonDistributionInterfaceTest, SerializeTest) { |
| 78 | using param_type = typename absl::poisson_distribution<TypeParam>::param_type; |
| 79 | const double kMax = |
| 80 | std::min(1e10 /* assertion limit */, |
| 81 | static_cast<double>(std::numeric_limits<TypeParam>::max())); |
| 82 | |
| 83 | const double kParams[] = { |
| 84 | // Cases around 1. |
| 85 | 1, // |
| 86 | std::nextafter(1.0, 0.0), // 1 - epsilon |
| 87 | std::nextafter(1.0, 2.0), // 1 + epsilon |
| 88 | // Arbitrary values. |
| 89 | 1e-8, 1e-4, |
| 90 | 0.0000005, // ~7.2e-7 |
| 91 | 0.2, // ~0.2x |
| 92 | 0.5, // 0.72 |
| 93 | 2, // ~2.8 |
| 94 | 20, // 3x ~9.6 |
| 95 | 100, 1e4, 1e8, 1.5e9, 1e20, |
| 96 | // Boundary cases. |
| 97 | std::numeric_limits<double>::max(), |
| 98 | std::numeric_limits<double>::epsilon(), |
| 99 | std::nextafter(std::numeric_limits<double>::min(), |
| 100 | 1.0), // min + epsilon |
| 101 | std::numeric_limits<double>::min(), // smallest normal |
| 102 | std::numeric_limits<double>::denorm_min(), // smallest denorm |
| 103 | std::numeric_limits<double>::min() / 2, // denorm |
| 104 | std::nextafter(std::numeric_limits<double>::min(), |
| 105 | 0.0), // denorm_max |
| 106 | }; |
| 107 | |
| 108 | |
| 109 | constexpr int kCount = 1000; |
| 110 | absl::InsecureBitGen gen; |
| 111 | for (const double m : kParams) { |
| 112 | const double mean = std::min(kMax, m); |
| 113 | const param_type param(mean); |
| 114 | |
| 115 | // Validate parameters. |
| 116 | absl::poisson_distribution<TypeParam> before(mean); |
| 117 | EXPECT_EQ(before.mean(), param.mean()); |
| 118 | |
| 119 | { |
| 120 | absl::poisson_distribution<TypeParam> via_param(param); |
| 121 | EXPECT_EQ(via_param, before); |
| 122 | EXPECT_EQ(via_param.param(), before.param()); |
| 123 | } |
| 124 | |
| 125 | // Smoke test. |
| 126 | auto sample_min = before.max(); |
| 127 | auto sample_max = before.min(); |
| 128 | for (int i = 0; i < kCount; i++) { |
| 129 | auto sample = before(gen); |
| 130 | EXPECT_GE(sample, before.min()); |
| 131 | EXPECT_LE(sample, before.max()); |
| 132 | if (sample > sample_max) sample_max = sample; |
| 133 | if (sample < sample_min) sample_min = sample; |
| 134 | } |
| 135 | |
| 136 | ABSL_INTERNAL_LOG(INFO, absl::StrCat("Range {", param.mean(), "}: ", |
| 137 | +sample_min, ", ", +sample_max)); |
| 138 | |
| 139 | // Validate stream serialization. |
| 140 | std::stringstream ss; |
| 141 | ss << before; |
| 142 | |
| 143 | absl::poisson_distribution<TypeParam> after(3.8); |
| 144 | |
| 145 | EXPECT_NE(before.mean(), after.mean()); |
| 146 | EXPECT_NE(before.param(), after.param()); |
| 147 | EXPECT_NE(before, after); |
| 148 | |
| 149 | ss >> after; |
| 150 | |
| 151 | EXPECT_EQ(before.mean(), after.mean()) // |
| 152 | << ss.str() << " " // |
| 153 | << (ss.good() ? "good " : "") // |
| 154 | << (ss.bad() ? "bad " : "") // |
| 155 | << (ss.eof() ? "eof " : "") // |
| 156 | << (ss.fail() ? "fail " : ""); |
| 157 | } |
| 158 | } |
| 159 | |
| 160 | // See http://www.itl.nist.gov/div898/handbook/eda/section3/eda366j.htm |
| 161 | |
| 162 | class PoissonModel { |
| 163 | public: |
| 164 | explicit PoissonModel(double mean) : mean_(mean) {} |
| 165 | |
| 166 | double mean() const { return mean_; } |
| 167 | double variance() const { return mean_; } |
| 168 | double stddev() const { return std::sqrt(variance()); } |
| 169 | double skew() const { return 1.0 / mean_; } |
| 170 | double kurtosis() const { return 3.0 + 1.0 / mean_; } |
| 171 | |
| 172 | // InitCDF() initializes the CDF for the distribution parameters. |
| 173 | void InitCDF(); |
| 174 | |
| 175 | // The InverseCDF, or the Percent-point function returns x, P(x) < v. |
| 176 | struct CDF { |
| 177 | size_t index; |
| 178 | double pmf; |
| 179 | double cdf; |
| 180 | }; |
| 181 | CDF InverseCDF(double p) { |
| 182 | CDF target{0, 0, p}; |
| 183 | auto it = std::upper_bound( |
| 184 | std::begin(cdf_), std::end(cdf_), target, |
| 185 | [](const CDF& a, const CDF& b) { return a.cdf < b.cdf; }); |
| 186 | return *it; |
| 187 | } |
| 188 | |
| 189 | void LogCDF() { |
| 190 | ABSL_INTERNAL_LOG(INFO, absl::StrCat("CDF (mean = ", mean_, ")")); |
| 191 | for (const auto c : cdf_) { |
| 192 | ABSL_INTERNAL_LOG(INFO, |
| 193 | absl::StrCat(c.index, ": pmf=", c.pmf, " cdf=", c.cdf)); |
| 194 | } |
| 195 | } |
| 196 | |
| 197 | private: |
| 198 | const double mean_; |
| 199 | |
| 200 | std::vector<CDF> cdf_; |
| 201 | }; |
| 202 | |
| 203 | // The goal is to compute an InverseCDF function, or percent point function for |
| 204 | // the poisson distribution, and use that to partition our output into equal |
| 205 | // range buckets. However there is no closed form solution for the inverse cdf |
| 206 | // for poisson distributions (the closest is the incomplete gamma function). |
| 207 | // Instead, `InitCDF` iteratively computes the PMF and the CDF. This enables |
| 208 | // searching for the bucket points. |
| 209 | void PoissonModel::InitCDF() { |
| 210 | if (!cdf_.empty()) { |
| 211 | // State already initialized. |
| 212 | return; |
| 213 | } |
| 214 | ABSL_ASSERT(mean_ < 201.0); |
| 215 | |
| 216 | const size_t max_i = 50 * stddev() + mean(); |
| 217 | const double e_neg_mean = std::exp(-mean()); |
| 218 | ABSL_ASSERT(e_neg_mean > 0); |
| 219 | |
| 220 | double d = 1; |
| 221 | double last_result = e_neg_mean; |
| 222 | double cumulative = e_neg_mean; |
| 223 | if (e_neg_mean > 1e-10) { |
| 224 | cdf_.push_back({0, e_neg_mean, cumulative}); |
| 225 | } |
| 226 | for (size_t i = 1; i < max_i; i++) { |
| 227 | d *= (mean() / i); |
| 228 | double result = e_neg_mean * d; |
| 229 | cumulative += result; |
| 230 | if (result < 1e-10 && result < last_result && cumulative > 0.999999) { |
| 231 | break; |
| 232 | } |
| 233 | if (result > 1e-7) { |
| 234 | cdf_.push_back({i, result, cumulative}); |
| 235 | } |
| 236 | last_result = result; |
| 237 | } |
| 238 | ABSL_ASSERT(!cdf_.empty()); |
| 239 | } |
| 240 | |
| 241 | // PoissonDistributionZTest implements a z-test for the poisson distribution. |
| 242 | |
| 243 | struct ZParam { |
| 244 | double mean; |
| 245 | double p_fail; // Z-Test probability of failure. |
| 246 | int trials; // Z-Test trials. |
| 247 | size_t samples; // Z-Test samples. |
| 248 | }; |
| 249 | |
| 250 | class PoissonDistributionZTest : public testing::TestWithParam<ZParam>, |
| 251 | public PoissonModel { |
| 252 | public: |
| 253 | PoissonDistributionZTest() : PoissonModel(GetParam().mean) {} |
| 254 | |
| 255 | // ZTestImpl provides a basic z-squared test of the mean vs. expected |
| 256 | // mean for data generated by the poisson distribution. |
| 257 | template <typename D> |
| 258 | bool SingleZTest(const double p, const size_t samples); |
| 259 | |
| 260 | absl::InsecureBitGen rng_; |
| 261 | }; |
| 262 | |
| 263 | template <typename D> |
| 264 | bool PoissonDistributionZTest::SingleZTest(const double p, |
| 265 | const size_t samples) { |
| 266 | D dis(mean()); |
| 267 | |
| 268 | absl::flat_hash_map<int32_t, int> buckets; |
| 269 | std::vector<double> data; |
| 270 | data.reserve(samples); |
| 271 | for (int j = 0; j < samples; j++) { |
| 272 | const auto x = dis(rng_); |
| 273 | buckets[x]++; |
| 274 | data.push_back(x); |
| 275 | } |
| 276 | |
| 277 | // The null-hypothesis is that the distribution is a poisson distribution with |
| 278 | // the provided mean (not estimated from the data). |
| 279 | const auto m = absl::random_internal::ComputeDistributionMoments(data); |
| 280 | const double max_err = absl::random_internal::MaxErrorTolerance(p); |
| 281 | const double z = absl::random_internal::ZScore(mean(), m); |
| 282 | const bool pass = absl::random_internal::Near("z", z, 0.0, max_err); |
| 283 | |
| 284 | if (!pass) { |
| 285 | ABSL_INTERNAL_LOG( |
| 286 | INFO, absl::StrFormat("p=%f max_err=%f\n" |
| 287 | " mean=%f vs. %f\n" |
| 288 | " stddev=%f vs. %f\n" |
| 289 | " skewness=%f vs. %f\n" |
| 290 | " kurtosis=%f vs. %f\n" |
| 291 | " z=%f", |
| 292 | p, max_err, m.mean, mean(), std::sqrt(m.variance), |
| 293 | stddev(), m.skewness, skew(), m.kurtosis, |
| 294 | kurtosis(), z)); |
| 295 | } |
| 296 | return pass; |
| 297 | } |
| 298 | |
| 299 | TEST_P(PoissonDistributionZTest, AbslPoissonDistribution) { |
| 300 | const auto& param = GetParam(); |
| 301 | const int expected_failures = |
| 302 | std::max(1, static_cast<int>(std::ceil(param.trials * param.p_fail))); |
| 303 | const double p = absl::random_internal::RequiredSuccessProbability( |
| 304 | param.p_fail, param.trials); |
| 305 | |
| 306 | int failures = 0; |
| 307 | for (int i = 0; i < param.trials; i++) { |
| 308 | failures += |
| 309 | SingleZTest<absl::poisson_distribution<int32_t>>(p, param.samples) ? 0 |
| 310 | : 1; |
| 311 | } |
| 312 | EXPECT_LE(failures, expected_failures); |
| 313 | } |
| 314 | |
| 315 | std::vector<ZParam> GetZParams() { |
| 316 | // These values have been adjusted from the "exact" computed values to reduce |
| 317 | // failure rates. |
| 318 | // |
| 319 | // It turns out that the actual values are not as close to the expected values |
| 320 | // as would be ideal. |
| 321 | return std::vector<ZParam>({ |
| 322 | // Knuth method. |
| 323 | ZParam{0.5, 0.01, 100, 1000}, |
| 324 | ZParam{1.0, 0.01, 100, 1000}, |
| 325 | ZParam{10.0, 0.01, 100, 5000}, |
| 326 | // Split-knuth method. |
| 327 | ZParam{20.0, 0.01, 100, 10000}, |
| 328 | ZParam{50.0, 0.01, 100, 10000}, |
| 329 | // Ratio of gaussians method. |
| 330 | ZParam{51.0, 0.01, 100, 10000}, |
| 331 | ZParam{200.0, 0.05, 10, 100000}, |
| 332 | ZParam{100000.0, 0.05, 10, 1000000}, |
| 333 | }); |
| 334 | } |
| 335 | |
| 336 | std::string ZParamName(const ::testing::TestParamInfo<ZParam>& info) { |
| 337 | const auto& p = info.param; |
| 338 | std::string name = absl::StrCat("mean_", absl::SixDigits(p.mean)); |
| 339 | return absl::StrReplaceAll(name, {{"+", "_"}, {"-", "_"}, {".", "_"}}); |
| 340 | } |
| 341 | |
| 342 | INSTANTIATE_TEST_SUITE_P(All, PoissonDistributionZTest, |
| 343 | ::testing::ValuesIn(GetZParams()), ZParamName); |
| 344 | |
| 345 | // The PoissonDistributionChiSquaredTest class provides a basic test framework |
| 346 | // for variates generated by a conforming poisson_distribution. |
| 347 | class PoissonDistributionChiSquaredTest : public testing::TestWithParam<double>, |
| 348 | public PoissonModel { |
| 349 | public: |
| 350 | PoissonDistributionChiSquaredTest() : PoissonModel(GetParam()) {} |
| 351 | |
| 352 | // The ChiSquaredTestImpl provides a chi-squared goodness of fit test for data |
| 353 | // generated by the poisson distribution. |
| 354 | template <typename D> |
| 355 | double ChiSquaredTestImpl(); |
| 356 | |
| 357 | private: |
| 358 | void InitChiSquaredTest(const double buckets); |
| 359 | |
| 360 | absl::InsecureBitGen rng_; |
| 361 | std::vector<size_t> cutoffs_; |
| 362 | std::vector<double> expected_; |
| 363 | }; |
| 364 | |
| 365 | void PoissonDistributionChiSquaredTest::InitChiSquaredTest( |
| 366 | const double buckets) { |
| 367 | if (!cutoffs_.empty() && !expected_.empty()) { |
| 368 | return; |
| 369 | } |
| 370 | InitCDF(); |
| 371 | |
| 372 | // The code below finds cuttoffs that yield approximately equally-sized |
| 373 | // buckets to the extent that it is possible. However for poisson |
| 374 | // distributions this is particularly challenging for small mean parameters. |
| 375 | // Track the expected proportion of items in each bucket. |
| 376 | double last_cdf = 0; |
| 377 | const double inc = 1.0 / buckets; |
| 378 | for (double p = inc; p <= 1.0; p += inc) { |
| 379 | auto result = InverseCDF(p); |
| 380 | if (!cutoffs_.empty() && cutoffs_.back() == result.index) { |
| 381 | continue; |
| 382 | } |
| 383 | double d = result.cdf - last_cdf; |
| 384 | cutoffs_.push_back(result.index); |
| 385 | expected_.push_back(d); |
| 386 | last_cdf = result.cdf; |
| 387 | } |
| 388 | cutoffs_.push_back(std::numeric_limits<size_t>::max()); |
| 389 | expected_.push_back(std::max(0.0, 1.0 - last_cdf)); |
| 390 | } |
| 391 | |
| 392 | template <typename D> |
| 393 | double PoissonDistributionChiSquaredTest::ChiSquaredTestImpl() { |
| 394 | const int kSamples = 2000; |
| 395 | const int kBuckets = 50; |
| 396 | |
| 397 | // The poisson CDF fails for large mean values, since e^-mean exceeds the |
| 398 | // machine precision. For these cases, using a normal approximation would be |
| 399 | // appropriate. |
| 400 | ABSL_ASSERT(mean() <= 200); |
| 401 | InitChiSquaredTest(kBuckets); |
| 402 | |
| 403 | D dis(mean()); |
| 404 | |
| 405 | std::vector<int32_t> counts(cutoffs_.size(), 0); |
| 406 | for (int j = 0; j < kSamples; j++) { |
| 407 | const size_t x = dis(rng_); |
| 408 | auto it = std::lower_bound(std::begin(cutoffs_), std::end(cutoffs_), x); |
| 409 | counts[std::distance(cutoffs_.begin(), it)]++; |
| 410 | } |
| 411 | |
| 412 | // Normalize the counts. |
| 413 | std::vector<int32_t> e(expected_.size(), 0); |
| 414 | for (int i = 0; i < e.size(); i++) { |
| 415 | e[i] = kSamples * expected_[i]; |
| 416 | } |
| 417 | |
| 418 | // The null-hypothesis is that the distribution is a poisson distribution with |
| 419 | // the provided mean (not estimated from the data). |
| 420 | const int dof = static_cast<int>(counts.size()) - 1; |
| 421 | |
| 422 | // The threshold for logging is 1-in-50. |
| 423 | const double threshold = absl::random_internal::ChiSquareValue(dof, 0.98); |
| 424 | |
| 425 | const double chi_square = absl::random_internal::ChiSquare( |
| 426 | std::begin(counts), std::end(counts), std::begin(e), std::end(e)); |
| 427 | |
| 428 | const double p = absl::random_internal::ChiSquarePValue(chi_square, dof); |
| 429 | |
| 430 | // Log if the chi_squared value is above the threshold. |
| 431 | if (chi_square > threshold) { |
| 432 | LogCDF(); |
| 433 | |
| 434 | ABSL_INTERNAL_LOG(INFO, absl::StrCat("VALUES buckets=", counts.size(), |
| 435 | " samples=", kSamples)); |
| 436 | for (size_t i = 0; i < counts.size(); i++) { |
| 437 | ABSL_INTERNAL_LOG( |
| 438 | INFO, absl::StrCat(cutoffs_[i], ": ", counts[i], " vs. E=", e[i])); |
| 439 | } |
| 440 | |
| 441 | ABSL_INTERNAL_LOG( |
| 442 | INFO, |
| 443 | absl::StrCat(kChiSquared, "(data, dof=", dof, ") = ", chi_square, " (", |
| 444 | p, ")\n", " vs.\n", kChiSquared, " @ 0.98 = ", threshold)); |
| 445 | } |
| 446 | return p; |
| 447 | } |
| 448 | |
| 449 | TEST_P(PoissonDistributionChiSquaredTest, AbslPoissonDistribution) { |
| 450 | const int kTrials = 20; |
| 451 | |
| 452 | // Large values are not yet supported -- this requires estimating the cdf |
| 453 | // using the normal distribution instead of the poisson in this case. |
| 454 | ASSERT_LE(mean(), 200.0); |
| 455 | if (mean() > 200.0) { |
| 456 | return; |
| 457 | } |
| 458 | |
| 459 | int failures = 0; |
| 460 | for (int i = 0; i < kTrials; i++) { |
| 461 | double p_value = ChiSquaredTestImpl<absl::poisson_distribution<int32_t>>(); |
| 462 | if (p_value < 0.005) { |
| 463 | failures++; |
| 464 | } |
| 465 | } |
| 466 | // There is a 0.10% chance of producing at least one failure, so raise the |
| 467 | // failure threshold high enough to allow for a flake rate < 10,000. |
| 468 | EXPECT_LE(failures, 4); |
| 469 | } |
| 470 | |
| 471 | INSTANTIATE_TEST_SUITE_P(All, PoissonDistributionChiSquaredTest, |
| 472 | ::testing::Values(0.5, 1.0, 2.0, 10.0, 50.0, 51.0, |
| 473 | 200.0)); |
| 474 | |
| 475 | // NOTE: absl::poisson_distribution is not guaranteed to be stable. |
| 476 | TEST(PoissonDistributionTest, StabilityTest) { |
| 477 | using testing::ElementsAre; |
| 478 | // absl::poisson_distribution stability relies on stability of |
| 479 | // std::exp, std::log, std::sqrt, std::ceil, std::floor, and |
| 480 | // absl::FastUniformBits, absl::StirlingLogFactorial, absl::RandU64ToDouble. |
| 481 | absl::random_internal::sequence_urbg urbg({ |
| 482 | 0x035b0dc7e0a18acfull, 0x06cebe0d2653682eull, 0x0061e9b23861596bull, |
| 483 | 0x0003eb76f6f7f755ull, 0xFFCEA50FDB2F953Bull, 0xC332DDEFBE6C5AA5ull, |
| 484 | 0x6558218568AB9702ull, 0x2AEF7DAD5B6E2F84ull, 0x1521B62829076170ull, |
| 485 | 0xECDD4775619F1510ull, 0x13CCA830EB61BD96ull, 0x0334FE1EAA0363CFull, |
| 486 | 0xB5735C904C70A239ull, 0xD59E9E0BCBAADE14ull, 0xEECC86BC60622CA7ull, |
| 487 | 0x4864f22c059bf29eull, 0x247856d8b862665cull, 0xe46e86e9a1337e10ull, |
| 488 | 0xd8c8541f3519b133ull, 0xe75b5162c567b9e4ull, 0xf732e5ded7009c5bull, |
| 489 | 0xb170b98353121eacull, 0x1ec2e8986d2362caull, 0x814c8e35fe9a961aull, |
| 490 | 0x0c3cd59c9b638a02ull, 0xcb3bb6478a07715cull, 0x1224e62c978bbc7full, |
| 491 | 0x671ef2cb04e81f6eull, 0x3c1cbd811eaf1808ull, 0x1bbc23cfa8fac721ull, |
| 492 | 0xa4c2cda65e596a51ull, 0xb77216fad37adf91ull, 0x836d794457c08849ull, |
| 493 | 0xe083df03475f49d7ull, 0xbc9feb512e6b0d6cull, 0xb12d74fdd718c8c5ull, |
| 494 | 0x12ff09653bfbe4caull, 0x8dd03a105bc4ee7eull, 0x5738341045ba0d85ull, |
| 495 | 0xf3fd722dc65ad09eull, 0xfa14fd21ea2a5705ull, 0xffe6ea4d6edb0c73ull, |
| 496 | 0xD07E9EFE2BF11FB4ull, 0x95DBDA4DAE909198ull, 0xEAAD8E716B93D5A0ull, |
| 497 | 0xD08ED1D0AFC725E0ull, 0x8E3C5B2F8E7594B7ull, 0x8FF6E2FBF2122B64ull, |
| 498 | 0x8888B812900DF01Cull, 0x4FAD5EA0688FC31Cull, 0xD1CFF191B3A8C1ADull, |
| 499 | 0x2F2F2218BE0E1777ull, 0xEA752DFE8B021FA1ull, 0xE5A0CC0FB56F74E8ull, |
| 500 | 0x18ACF3D6CE89E299ull, 0xB4A84FE0FD13E0B7ull, 0x7CC43B81D2ADA8D9ull, |
| 501 | 0x165FA26680957705ull, 0x93CC7314211A1477ull, 0xE6AD206577B5FA86ull, |
| 502 | 0xC75442F5FB9D35CFull, 0xEBCDAF0C7B3E89A0ull, 0xD6411BD3AE1E7E49ull, |
| 503 | 0x00250E2D2071B35Eull, 0x226800BB57B8E0AFull, 0x2464369BF009B91Eull, |
| 504 | 0x5563911D59DFA6AAull, 0x78C14389D95A537Full, 0x207D5BA202E5B9C5ull, |
| 505 | 0x832603766295CFA9ull, 0x11C819684E734A41ull, 0xB3472DCA7B14A94Aull, |
| 506 | }); |
| 507 | |
| 508 | std::vector<int> output(10); |
| 509 | |
| 510 | // Method 1. |
| 511 | { |
| 512 | absl::poisson_distribution<int> dist(5); |
| 513 | std::generate(std::begin(output), std::end(output), |
| 514 | [&] { return dist(urbg); }); |
| 515 | } |
| 516 | EXPECT_THAT(output, // mean = 4.2 |
| 517 | ElementsAre(1, 0, 0, 4, 2, 10, 3, 3, 7, 12)); |
| 518 | |
| 519 | // Method 2. |
| 520 | { |
| 521 | urbg.reset(); |
| 522 | absl::poisson_distribution<int> dist(25); |
| 523 | std::generate(std::begin(output), std::end(output), |
| 524 | [&] { return dist(urbg); }); |
| 525 | } |
| 526 | EXPECT_THAT(output, // mean = 19.8 |
| 527 | ElementsAre(9, 35, 18, 10, 35, 18, 10, 35, 18, 10)); |
| 528 | |
| 529 | // Method 3. |
| 530 | { |
| 531 | urbg.reset(); |
| 532 | absl::poisson_distribution<int> dist(121); |
| 533 | std::generate(std::begin(output), std::end(output), |
| 534 | [&] { return dist(urbg); }); |
| 535 | } |
| 536 | EXPECT_THAT(output, // mean = 124.1 |
| 537 | ElementsAre(161, 122, 129, 124, 112, 112, 117, 120, 130, 114)); |
| 538 | } |
| 539 | |
| 540 | TEST(PoissonDistributionTest, AlgorithmExpectedValue_1) { |
| 541 | // This tests small values of the Knuth method. |
| 542 | // The underlying uniform distribution will generate exactly 0.5. |
| 543 | absl::random_internal::sequence_urbg urbg({0x8000000000000001ull}); |
| 544 | absl::poisson_distribution<int> dist(5); |
| 545 | EXPECT_EQ(7, dist(urbg)); |
| 546 | } |
| 547 | |
| 548 | TEST(PoissonDistributionTest, AlgorithmExpectedValue_2) { |
| 549 | // This tests larger values of the Knuth method. |
| 550 | // The underlying uniform distribution will generate exactly 0.5. |
| 551 | absl::random_internal::sequence_urbg urbg({0x8000000000000001ull}); |
| 552 | absl::poisson_distribution<int> dist(25); |
| 553 | EXPECT_EQ(36, dist(urbg)); |
| 554 | } |
| 555 | |
| 556 | TEST(PoissonDistributionTest, AlgorithmExpectedValue_3) { |
| 557 | // This variant uses the ratio of uniforms method. |
| 558 | absl::random_internal::sequence_urbg urbg( |
| 559 | {0x7fffffffffffffffull, 0x8000000000000000ull}); |
| 560 | |
| 561 | absl::poisson_distribution<int> dist(121); |
| 562 | EXPECT_EQ(121, dist(urbg)); |
| 563 | } |
| 564 | |
| 565 | } // namespace |