Blame - absl/random/discrete_distribution_test.cc - RealtimeRoboticsGroup/test

blob: 7296f0ac226aba76c19d4766c71f061a3f0fae2b [file] [log] [blame]

Austin Schuh	36244a1	2019-09-21 17:52:38 -0700	[diff] [blame^]	1	// Copyright 2017 The Abseil Authors.
				2	//
				3	// Licensed under the Apache License, Version 2.0 (the "License");
				4	// you may not use this file except in compliance with the License.
				5	// You may obtain a copy of the License at
				6	//
				7	// https://www.apache.org/licenses/LICENSE-2.0
				8	//
				9	// Unless required by applicable law or agreed to in writing, software
				10	// distributed under the License is distributed on an "AS IS" BASIS,
				11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	// See the License for the specific language governing permissions and
				13	// limitations under the License.
				14
				15	#include "absl/random/discrete_distribution.h"
				16
				17	#include <cmath>
				18	#include <cstddef>
				19	#include <cstdint>
				20	#include <iterator>
				21	#include <numeric>
				22	#include <random>
				23	#include <sstream>
				24	#include <string>
				25	#include <vector>
				26
				27	#include "gmock/gmock.h"
				28	#include "gtest/gtest.h"
				29	#include "absl/base/internal/raw_logging.h"
				30	#include "absl/random/internal/chi_square.h"
				31	#include "absl/random/internal/distribution_test_util.h"
				32	#include "absl/random/internal/sequence_urbg.h"
				33	#include "absl/random/random.h"
				34	#include "absl/strings/str_cat.h"
				35	#include "absl/strings/strip.h"
				36
				37	namespace {
				38
				39	template <typename IntType>
				40	class DiscreteDistributionTypeTest : public ::testing::Test {};
				41
				42	using IntTypes = ::testing::Types<int8_t, uint8_t, int16_t, uint16_t, int32_t,
				43	uint32_t, int64_t, uint64_t>;
				44	TYPED_TEST_SUITE(DiscreteDistributionTypeTest, IntTypes);
				45
				46	TYPED_TEST(DiscreteDistributionTypeTest, ParamSerializeTest) {
				47	using param_type =
				48	typename absl::discrete_distribution<TypeParam>::param_type;
				49
				50	absl::discrete_distribution<TypeParam> empty;
				51	EXPECT_THAT(empty.probabilities(), testing::ElementsAre(1.0));
				52
				53	absl::discrete_distribution<TypeParam> before({1.0, 2.0, 1.0});
				54
				55	// Validate that the probabilities sum to 1.0. We picked values which
				56	// can be represented exactly to avoid floating-point roundoff error.
				57	double s = 0;
				58	for (const auto& x : before.probabilities()) {
				59	s += x;
				60	}
				61	EXPECT_EQ(s, 1.0);
				62	EXPECT_THAT(before.probabilities(), testing::ElementsAre(0.25, 0.5, 0.25));
				63
				64	// Validate the same data via an initializer list.
				65	{
				66	std::vector<double> data({1.0, 2.0, 1.0});
				67
				68	absl::discrete_distribution<TypeParam> via_param{
				69	param_type(std::begin(data), std::end(data))};
				70
				71	EXPECT_EQ(via_param, before);
				72	}
				73
				74	std::stringstream ss;
				75	ss << before;
				76	absl::discrete_distribution<TypeParam> after;
				77
				78	EXPECT_NE(before, after);
				79
				80	ss >> after;
				81
				82	EXPECT_EQ(before, after);
				83	}
				84
				85	TYPED_TEST(DiscreteDistributionTypeTest, Constructor) {
				86	auto fn = [](double x) { return x; };
				87	{
				88	absl::discrete_distribution<int> unary(0, 1.0, 9.0, fn);
				89	EXPECT_THAT(unary.probabilities(), testing::ElementsAre(1.0));
				90	}
				91
				92	{
				93	absl::discrete_distribution<int> unary(2, 1.0, 9.0, fn);
				94	// => fn(1.0 + 0 * 4 + 2) => 3
				95	// => fn(1.0 + 1 * 4 + 2) => 7
				96	EXPECT_THAT(unary.probabilities(), testing::ElementsAre(0.3, 0.7));
				97	}
				98	}
				99
				100	TEST(DiscreteDistributionTest, InitDiscreteDistribution) {
				101	using testing::Pair;
				102
				103	{
				104	std::vector<double> p({1.0, 2.0, 3.0});
				105	std::vector<std::pair<double, size_t>> q =
				106	absl::random_internal::InitDiscreteDistribution(&p);
				107
				108	EXPECT_THAT(p, testing::ElementsAre(1 / 6.0, 2 / 6.0, 3 / 6.0));
				109
				110	// Each bucket is p=1/3, so bucket 0 will send half it's traffic
				111	// to bucket 2, while the rest will retain all of their traffic.
				112	EXPECT_THAT(q, testing::ElementsAre(Pair(0.5, 2), //
				113	Pair(1.0, 1), //
				114	Pair(1.0, 2)));
				115	}
				116
				117	{
				118	std::vector<double> p({1.0, 2.0, 3.0, 5.0, 2.0});
				119
				120	std::vector<std::pair<double, size_t>> q =
				121	absl::random_internal::InitDiscreteDistribution(&p);
				122
				123	EXPECT_THAT(p, testing::ElementsAre(1 / 13.0, 2 / 13.0, 3 / 13.0, 5 / 13.0,
				124	2 / 13.0));
				125
				126	// A more complex bucketing solution: Each bucket has p=0.2
				127	// So buckets 0, 1, 4 will send their alternate traffic elsewhere, which
				128	// happens to be bucket 3.
				129	// However, summing up that alternate traffic gives bucket 3 too much
				130	// traffic, so it will send some traffic to bucket 2.
				131	constexpr double b0 = 1.0 / 13.0 / 0.2;
				132	constexpr double b1 = 2.0 / 13.0 / 0.2;
				133	constexpr double b3 = (5.0 / 13.0 / 0.2) - ((1 - b0) + (1 - b1) + (1 - b1));
				134
				135	EXPECT_THAT(q, testing::ElementsAre(Pair(b0, 3), //
				136	Pair(b1, 3), //
				137	Pair(1.0, 2), //
				138	Pair(b3, 2), //
				139	Pair(b1, 3)));
				140	}
				141	}
				142
				143	TEST(DiscreteDistributionTest, ChiSquaredTest50) {
				144	using absl::random_internal::kChiSquared;
				145
				146	constexpr size_t kTrials = 10000;
				147	constexpr int kBuckets = 50; // inclusive, so actally +1
				148
				149	// 1-in-100000 threshold, but remember, there are about 8 tests
				150	// in this file. And the test could fail for other reasons.
				151	// Empirically validated with --runs_per_test=10000.
				152	const int kThreshold =
				153	absl::random_internal::ChiSquareValue(kBuckets, 0.99999);
				154
				155	std::vector<double> weights(kBuckets, 0);
				156	std::iota(std::begin(weights), std::end(weights), 1);
				157	absl::discrete_distribution<int> dist(std::begin(weights), std::end(weights));
				158
				159	absl::InsecureBitGen rng;
				160
				161	std::vector<int32_t> counts(kBuckets, 0);
				162	for (size_t i = 0; i < kTrials; i++) {
				163	auto x = dist(rng);
				164	counts[x]++;
				165	}
				166
				167	// Scale weights.
				168	double sum = 0;
				169	for (double x : weights) {
				170	sum += x;
				171	}
				172	for (double& x : weights) {
				173	x = kTrials * (x / sum);
				174	}
				175
				176	double chi_square =
				177	absl::random_internal::ChiSquare(std::begin(counts), std::end(counts),
				178	std::begin(weights), std::end(weights));
				179
				180	if (chi_square > kThreshold) {
				181	double p_value =
				182	absl::random_internal::ChiSquarePValue(chi_square, kBuckets);
				183
				184	// Chi-squared test failed. Output does not appear to be uniform.
				185	std::string msg;
				186	for (size_t i = 0; i < counts.size(); i++) {
				187	absl::StrAppend(&msg, i, ": ", counts[i], " vs ", weights[i], "\n");
				188	}
				189	absl::StrAppend(&msg, kChiSquared, " p-value ", p_value, "\n");
				190	absl::StrAppend(&msg, "High ", kChiSquared, " value: ", chi_square, " > ",
				191	kThreshold);
				192	ABSL_RAW_LOG(INFO, "%s", msg.c_str());
				193	FAIL() << msg;
				194	}
				195	}
				196
				197	TEST(DiscreteDistributionTest, StabilityTest) {
				198	// absl::discrete_distribution stabilitiy relies on
				199	// absl::uniform_int_distribution and absl::bernoulli_distribution.
				200	absl::random_internal::sequence_urbg urbg(
				201	{0x0003eb76f6f7f755ull, 0xFFCEA50FDB2F953Bull, 0xC332DDEFBE6C5AA5ull,
				202	0x6558218568AB9702ull, 0x2AEF7DAD5B6E2F84ull, 0x1521B62829076170ull,
				203	0xECDD4775619F1510ull, 0x13CCA830EB61BD96ull, 0x0334FE1EAA0363CFull,
				204	0xB5735C904C70A239ull, 0xD59E9E0BCBAADE14ull, 0xEECC86BC60622CA7ull});
				205
				206	std::vector<int> output(6);
				207
				208	{
				209	absl::discrete_distribution<int32_t> dist({1.0, 2.0, 3.0, 5.0, 2.0});
				210	EXPECT_EQ(0, dist.min());
				211	EXPECT_EQ(4, dist.max());
				212	for (auto& v : output) {
				213	v = dist(urbg);
				214	}
				215	EXPECT_EQ(12, urbg.invocations());
				216	}
				217
				218	// With 12 calls to urbg, each call into discrete_distribution consumes
				219	// precisely 2 values: one for the uniform call, and a second for the
				220	// bernoulli.
				221	//
				222	// Given the alt mapping: 0=>3, 1=>3, 2=>2, 3=>2, 4=>3, we can
				223	//
				224	// uniform: 443210143131
				225	// bernoulli: b0 000011100101
				226	// bernoulli: b1 001111101101
				227	// bernoulli: b2 111111111111
				228	// bernoulli: b3 001111101111
				229	// bernoulli: b4 001111101101
				230	// ...
				231	EXPECT_THAT(output, testing::ElementsAre(3, 3, 1, 3, 3, 3));
				232
				233	{
				234	urbg.reset();
				235	absl::discrete_distribution<int64_t> dist({1.0, 2.0, 3.0, 5.0, 2.0});
				236	EXPECT_EQ(0, dist.min());
				237	EXPECT_EQ(4, dist.max());
				238	for (auto& v : output) {
				239	v = dist(urbg);
				240	}
				241	EXPECT_EQ(12, urbg.invocations());
				242	}
				243	EXPECT_THAT(output, testing::ElementsAre(3, 3, 0, 3, 0, 4));
				244	}
				245
				246	} // namespace