Blame - absl/synchronization/mutex_benchmark.cc - RealtimeRoboticsGroup/test

blob: ab1880012a0c1e791478b047b65cc0a90d36b7d7 [file] [log] [blame]

Austin Schuh	36244a1	2019-09-21 17:52:38 -0700	[diff] [blame^]	1	// Copyright 2017 The Abseil Authors.
				2	//
				3	// Licensed under the Apache License, Version 2.0 (the "License");
				4	// you may not use this file except in compliance with the License.
				5	// You may obtain a copy of the License at
				6	//
				7	// https://www.apache.org/licenses/LICENSE-2.0
				8	//
				9	// Unless required by applicable law or agreed to in writing, software
				10	// distributed under the License is distributed on an "AS IS" BASIS,
				11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	// See the License for the specific language governing permissions and
				13	// limitations under the License.
				14
				15	#include <cstdint>
				16	#include <mutex> // NOLINT(build/c++11)
				17	#include <vector>
				18
				19	#include "absl/base/internal/cycleclock.h"
				20	#include "absl/base/internal/spinlock.h"
				21	#include "absl/synchronization/blocking_counter.h"
				22	#include "absl/synchronization/internal/thread_pool.h"
				23	#include "absl/synchronization/mutex.h"
				24	#include "benchmark/benchmark.h"
				25
				26	namespace {
				27
				28	void BM_Mutex(benchmark::State& state) {
				29	static absl::Mutex* mu = new absl::Mutex;
				30	for (auto _ : state) {
				31	absl::MutexLock lock(mu);
				32	}
				33	}
				34	BENCHMARK(BM_Mutex)->UseRealTime()->Threads(1)->ThreadPerCpu();
				35
				36	static void DelayNs(int64_t ns, int* data) {
				37	int64_t end = absl::base_internal::CycleClock::Now() +
				38	ns * absl::base_internal::CycleClock::Frequency() / 1e9;
				39	while (absl::base_internal::CycleClock::Now() < end) {
				40	++(*data);
				41	benchmark::DoNotOptimize(*data);
				42	}
				43	}
				44
				45	template <typename MutexType>
				46	class RaiiLocker {
				47	public:
				48	explicit RaiiLocker(MutexType* mu) : mu_(mu) { mu_->Lock(); }
				49	~RaiiLocker() { mu_->Unlock(); }
				50	private:
				51	MutexType* mu_;
				52	};
				53
				54	template <>
				55	class RaiiLocker<std::mutex> {
				56	public:
				57	explicit RaiiLocker(std::mutex* mu) : mu_(mu) { mu_->lock(); }
				58	~RaiiLocker() { mu_->unlock(); }
				59	private:
				60	std::mutex* mu_;
				61	};
				62
				63	template <typename MutexType>
				64	void BM_Contended(benchmark::State& state) {
				65	struct Shared {
				66	MutexType mu;
				67	int data = 0;
				68	};
				69	static auto* shared = new Shared;
				70	int local = 0;
				71	for (auto _ : state) {
				72	// Here we model both local work outside of the critical section as well as
				73	// some work inside of the critical section. The idea is to capture some
				74	// more or less realisitic contention levels.
				75	// If contention is too low, the benchmark won't measure anything useful.
				76	// If contention is unrealistically high, the benchmark will favor
				77	// bad mutex implementations that block and otherwise distract threads
				78	// from the mutex and shared state for as much as possible.
				79	// To achieve this amount of local work is multiplied by number of threads
				80	// to keep ratio between local work and critical section approximately
				81	// equal regardless of number of threads.
				82	DelayNs(100 * state.threads, &local);
				83	RaiiLocker<MutexType> locker(&shared->mu);
				84	DelayNs(state.range(0), &shared->data);
				85	}
				86	}
				87
				88	BENCHMARK_TEMPLATE(BM_Contended, absl::Mutex)
				89	->UseRealTime()
				90	// ThreadPerCpu poorly handles non-power-of-two CPU counts.
				91	->Threads(1)
				92	->Threads(2)
				93	->Threads(4)
				94	->Threads(6)
				95	->Threads(8)
				96	->Threads(12)
				97	->Threads(16)
				98	->Threads(24)
				99	->Threads(32)
				100	->Threads(48)
				101	->Threads(64)
				102	->Threads(96)
				103	->Threads(128)
				104	->Threads(192)
				105	->Threads(256)
				106	// Some empirically chosen amounts of work in critical section.
				107	// 1 is low contention, 200 is high contention and few values in between.
				108	->Arg(1)
				109	->Arg(20)
				110	->Arg(50)
				111	->Arg(200);
				112
				113	BENCHMARK_TEMPLATE(BM_Contended, absl::base_internal::SpinLock)
				114	->UseRealTime()
				115	// ThreadPerCpu poorly handles non-power-of-two CPU counts.
				116	->Threads(1)
				117	->Threads(2)
				118	->Threads(4)
				119	->Threads(6)
				120	->Threads(8)
				121	->Threads(12)
				122	->Threads(16)
				123	->Threads(24)
				124	->Threads(32)
				125	->Threads(48)
				126	->Threads(64)
				127	->Threads(96)
				128	->Threads(128)
				129	->Threads(192)
				130	->Threads(256)
				131	// Some empirically chosen amounts of work in critical section.
				132	// 1 is low contention, 200 is high contention and few values in between.
				133	->Arg(1)
				134	->Arg(20)
				135	->Arg(50)
				136	->Arg(200);
				137
				138	BENCHMARK_TEMPLATE(BM_Contended, std::mutex)
				139	->UseRealTime()
				140	// ThreadPerCpu poorly handles non-power-of-two CPU counts.
				141	->Threads(1)
				142	->Threads(2)
				143	->Threads(4)
				144	->Threads(6)
				145	->Threads(8)
				146	->Threads(12)
				147	->Threads(16)
				148	->Threads(24)
				149	->Threads(32)
				150	->Threads(48)
				151	->Threads(64)
				152	->Threads(96)
				153	->Threads(128)
				154	->Threads(192)
				155	->Threads(256)
				156	// Some empirically chosen amounts of work in critical section.
				157	// 1 is low contention, 200 is high contention and few values in between.
				158	->Arg(1)
				159	->Arg(20)
				160	->Arg(50)
				161	->Arg(200);
				162
				163	// Measure the overhead of conditions on mutex release (when they must be
				164	// evaluated). Mutex has (some) support for equivalence classes allowing
				165	// Conditions with the same function/argument to potentially not be multiply
				166	// evaluated.
				167	//
				168	// num_classes==0 is used for the special case of every waiter being distinct.
				169	void BM_ConditionWaiters(benchmark::State& state) {
				170	int num_classes = state.range(0);
				171	int num_waiters = state.range(1);
				172
				173	struct Helper {
				174	static void Waiter(absl::BlockingCounter* init, absl::Mutex* m, int* p) {
				175	init->DecrementCount();
				176	m->LockWhen(absl::Condition(
				177	static_cast<bool ()(int)>([](int* v) { return *v == 0; }), p));
				178	m->Unlock();
				179	}
				180	};
				181
				182	if (num_classes == 0) {
				183	// No equivalence classes.
				184	num_classes = num_waiters;
				185	}
				186
				187	absl::BlockingCounter init(num_waiters);
				188	absl::Mutex mu;
				189	std::vector<int> equivalence_classes(num_classes, 1);
				190
				191	// Must be declared last to be destroyed first.
				192	absl::synchronization_internal::ThreadPool pool(num_waiters);
				193
				194	for (int i = 0; i < num_waiters; i++) {
				195	// Mutex considers Conditions with the same function and argument
				196	// to be equivalent.
				197	pool.Schedule([&, i] {
				198	Helper::Waiter(&init, &mu, &equivalence_classes[i % num_classes]);
				199	});
				200	}
				201	init.Wait();
				202
				203	for (auto _ : state) {
				204	mu.Lock();
				205	mu.Unlock(); // Each unlock requires Condition evaluation for our waiters.
				206	}
				207
				208	mu.Lock();
				209	for (int i = 0; i < num_classes; i++) {
				210	equivalence_classes[i] = 0;
				211	}
				212	mu.Unlock();
				213	}
				214
				215	// Some configurations have higher thread limits than others.
				216	#if defined(__linux__) && !defined(THREAD_SANITIZER)
				217	constexpr int kMaxConditionWaiters = 8192;
				218	#else
				219	constexpr int kMaxConditionWaiters = 1024;
				220	#endif
				221	BENCHMARK(BM_ConditionWaiters)->RangePair(0, 2, 1, kMaxConditionWaiters);
				222
				223	} // namespace