Blame - third_party/google-benchmark/src/benchmark_runner.cc - RealtimeRoboticsGroup/test

blob: 38faeec8e3ee7ef48caa0f10301377f50bcbe651 [file] [log] [blame]

Austin Schuh	cbc1740	2019-01-21 21:00:30 -0800	[diff] [blame]	1	// Copyright 2015 Google Inc. All rights reserved.
				2	//
				3	// Licensed under the Apache License, Version 2.0 (the "License");
				4	// you may not use this file except in compliance with the License.
				5	// You may obtain a copy of the License at
				6	//
				7	// http://www.apache.org/licenses/LICENSE-2.0
				8	//
				9	// Unless required by applicable law or agreed to in writing, software
				10	// distributed under the License is distributed on an "AS IS" BASIS,
				11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	// See the License for the specific language governing permissions and
				13	// limitations under the License.
				14
				15	#include "benchmark_runner.h"
				16	#include "benchmark/benchmark.h"
				17	#include "benchmark_api_internal.h"
				18	#include "internal_macros.h"
				19
				20	#ifndef BENCHMARK_OS_WINDOWS
				21	#ifndef BENCHMARK_OS_FUCHSIA
				22	#include <sys/resource.h>
				23	#endif
				24	#include <sys/time.h>
				25	#include <unistd.h>
				26	#endif
				27
				28	#include <algorithm>
				29	#include <atomic>
				30	#include <condition_variable>
				31	#include <cstdio>
				32	#include <cstdlib>
				33	#include <fstream>
				34	#include <iostream>
				35	#include <memory>
				36	#include <string>
				37	#include <thread>
				38	#include <utility>
				39
				40	#include "check.h"
				41	#include "colorprint.h"
				42	#include "commandlineflags.h"
				43	#include "complexity.h"
				44	#include "counter.h"
				45	#include "internal_macros.h"
				46	#include "log.h"
				47	#include "mutex.h"
				48	#include "re.h"
				49	#include "statistics.h"
				50	#include "string_util.h"
				51	#include "thread_manager.h"
				52	#include "thread_timer.h"
				53
				54	namespace benchmark {
				55
				56	namespace internal {
				57
				58	MemoryManager* memory_manager = nullptr;
				59
				60	namespace {
				61
				62	static const size_t kMaxIterations = 1000000000;
				63
				64	BenchmarkReporter::Run CreateRunReport(
				65	const benchmark::internal::BenchmarkInstance& b,
				66	const internal::ThreadManager::Result& results, size_t memory_iterations,
				67	const MemoryManager::Result& memory_result, double seconds) {
				68	// Create report about this benchmark run.
				69	BenchmarkReporter::Run report;
				70
				71	report.run_name = b.name;
				72	report.error_occurred = results.has_error_;
				73	report.error_message = results.error_message_;
				74	report.report_label = results.report_label_;
				75	// This is the total iterations across all threads.
				76	report.iterations = results.iterations;
				77	report.time_unit = b.time_unit;
				78
				79	if (!report.error_occurred) {
				80	if (b.use_manual_time) {
				81	report.real_accumulated_time = results.manual_time_used;
				82	} else {
				83	report.real_accumulated_time = results.real_time_used;
				84	}
				85	report.cpu_accumulated_time = results.cpu_time_used;
				86	report.complexity_n = results.complexity_n;
				87	report.complexity = b.complexity;
				88	report.complexity_lambda = b.complexity_lambda;
				89	report.statistics = b.statistics;
				90	report.counters = results.counters;
				91
				92	if (memory_iterations > 0) {
				93	report.has_memory_result = true;
				94	report.allocs_per_iter =
				95	memory_iterations ? static_cast<double>(memory_result.num_allocs) /
				96	memory_iterations
				97	: 0;
				98	report.max_bytes_used = memory_result.max_bytes_used;
				99	}
				100
				101	internal::Finish(&report.counters, results.iterations, seconds, b.threads);
				102	}
				103	return report;
				104	}
				105
				106	// Execute one thread of benchmark b for the specified number of iterations.
				107	// Adds the stats collected for the thread into *total.
				108	void RunInThread(const BenchmarkInstance* b, size_t iters, int thread_id,
				109	ThreadManager* manager) {
				110	internal::ThreadTimer timer;
				111	State st = b->Run(iters, thread_id, &timer, manager);
				112	CHECK(st.iterations() >= st.max_iterations)
				113	<< "Benchmark returned before State::KeepRunning() returned false!";
				114	{
				115	MutexLock l(manager->GetBenchmarkMutex());
				116	internal::ThreadManager::Result& results = manager->results;
				117	results.iterations += st.iterations();
				118	results.cpu_time_used += timer.cpu_time_used();
				119	results.real_time_used += timer.real_time_used();
				120	results.manual_time_used += timer.manual_time_used();
				121	results.complexity_n += st.complexity_length_n();
				122	internal::Increment(&results.counters, st.counters);
				123	}
				124	manager->NotifyThreadComplete();
				125	}
				126
				127	class BenchmarkRunner {
				128	public:
				129	BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_,
				130	std::vector<BenchmarkReporter::Run>* complexity_reports_)
				131	: b(b_),
				132	complexity_reports(*complexity_reports_),
				133	min_time(!IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time),
				134	repeats(b.repetitions != 0 ? b.repetitions
				135	: FLAGS_benchmark_repetitions),
				136	has_explicit_iteration_count(b.iterations != 0),
				137	pool(b.threads - 1),
				138	iters(has_explicit_iteration_count ? b.iterations : 1) {
				139	run_results.display_report_aggregates_only =
				140	(FLAGS_benchmark_report_aggregates_only \|\|
				141	FLAGS_benchmark_display_aggregates_only);
				142	run_results.file_report_aggregates_only =
				143	FLAGS_benchmark_report_aggregates_only;
				144	if (b.aggregation_report_mode != internal::ARM_Unspecified) {
				145	run_results.display_report_aggregates_only =
				146	(b.aggregation_report_mode &
				147	internal::ARM_DisplayReportAggregatesOnly);
				148	run_results.file_report_aggregates_only =
				149	(b.aggregation_report_mode & internal::ARM_FileReportAggregatesOnly);
				150	}
				151
				152	for (int repetition_num = 0; repetition_num < repeats; repetition_num++) {
				153	const bool is_the_first_repetition = repetition_num == 0;
				154	DoOneRepetition(is_the_first_repetition);
				155	}
				156
				157	// Calculate additional statistics
				158	run_results.aggregates_only = ComputeStats(run_results.non_aggregates);
				159
				160	// Maybe calculate complexity report
				161	if ((b.complexity != oNone) && b.last_benchmark_instance) {
				162	auto additional_run_stats = ComputeBigO(complexity_reports);
				163	run_results.aggregates_only.insert(run_results.aggregates_only.end(),
				164	additional_run_stats.begin(),
				165	additional_run_stats.end());
				166	complexity_reports.clear();
				167	}
				168	}
				169
				170	RunResults&& get_results() { return std::move(run_results); }
				171
				172	private:
				173	RunResults run_results;
				174
				175	const benchmark::internal::BenchmarkInstance& b;
				176	std::vector<BenchmarkReporter::Run>& complexity_reports;
				177
				178	const double min_time;
				179	const int repeats;
				180	const bool has_explicit_iteration_count;
				181
				182	std::vector<std::thread> pool;
				183
				184	size_t iters; // preserved between repetitions!
				185	// So only the first repetition has to find/calculate it,
				186	// the other repetitions will just use that precomputed iteration count.
				187
				188	struct IterationResults {
				189	internal::ThreadManager::Result results;
				190	size_t iters;
				191	double seconds;
				192	};
				193	IterationResults DoNIterations() {
				194	VLOG(2) << "Running " << b.name << " for " << iters << "\n";
				195
				196	std::unique_ptr<internal::ThreadManager> manager;
				197	manager.reset(new internal::ThreadManager(b.threads));
				198
				199	// Run all but one thread in separate threads
				200	for (std::size_t ti = 0; ti < pool.size(); ++ti) {
				201	pool[ti] = std::thread(&RunInThread, &b, iters, static_cast<int>(ti + 1),
				202	manager.get());
				203	}
				204	// And run one thread here directly.
				205	// (If we were asked to run just one thread, we don't create new threads.)
				206	// Yes, we need to do this here after we start the separate threads.
				207	RunInThread(&b, iters, 0, manager.get());
				208
				209	// The main thread has finished. Now let's wait for the other threads.
				210	manager->WaitForAllThreads();
				211	for (std::thread& thread : pool) thread.join();
				212
				213	IterationResults i;
				214	// Acquire the measurements/counters from the manager, UNDER THE LOCK!
				215	{
				216	MutexLock l(manager->GetBenchmarkMutex());
				217	i.results = manager->results;
				218	}
				219
				220	// And get rid of the manager.
				221	manager.reset();
				222
				223	// Adjust real/manual time stats since they were reported per thread.
				224	i.results.real_time_used /= b.threads;
				225	i.results.manual_time_used /= b.threads;
				226
				227	VLOG(2) << "Ran in " << i.results.cpu_time_used << "/"
				228	<< i.results.real_time_used << "\n";
				229
				230	// So for how long were we running?
				231	i.iters = iters;
				232	// Base decisions off of real time if requested by this benchmark.
				233	i.seconds = i.results.cpu_time_used;
				234	if (b.use_manual_time) {
				235	i.seconds = i.results.manual_time_used;
				236	} else if (b.use_real_time) {
				237	i.seconds = i.results.real_time_used;
				238	}
				239
				240	return i;
				241	}
				242
				243	size_t PredictNumItersNeeded(const IterationResults& i) const {
				244	// See how much iterations should be increased by.
				245	// Note: Avoid division by zero with max(seconds, 1ns).
				246	double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9);
				247	// If our last run was at least 10% of FLAGS_benchmark_min_time then we
				248	// use the multiplier directly.
				249	// Otherwise we use at most 10 times expansion.
				250	// NOTE: When the last run was at least 10% of the min time the max
				251	// expansion should be 14x.
				252	bool is_significant = (i.seconds / min_time) > 0.1;
				253	multiplier = is_significant ? multiplier : std::min(10.0, multiplier);
				254	if (multiplier <= 1.0) multiplier = 2.0;
				255
				256	// So what seems to be the sufficiently-large iteration count? Round up.
				257	const size_t max_next_iters =
				258	0.5 + std::max(multiplier * i.iters, i.iters + 1.0);
				259	// But we do have some sanity limits though..
				260	const size_t next_iters = std::min(max_next_iters, kMaxIterations);
				261
				262	VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n";
				263	return next_iters; // round up before conversion to integer.
				264	}
				265
				266	bool ShouldReportIterationResults(const IterationResults& i) const {
				267	// Determine if this run should be reported;
				268	// Either it has run for a sufficient amount of time
				269	// or because an error was reported.
				270	return i.results.has_error_ \|\|
				271	i.iters >= kMaxIterations \|\| // Too many iterations already.
				272	i.seconds >= min_time \|\| // The elapsed time is large enough.
				273	// CPU time is specified but the elapsed real time greatly exceeds
				274	// the minimum time.
				275	// Note that user provided timers are except from this sanity check.
				276	((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time);
				277	}
				278
				279	void DoOneRepetition(bool is_the_first_repetition) {
				280	IterationResults i;
				281
				282	// We may be gradually increasing the length (iteration count)
				283	// of the benchmark until we decide the results are significant.
				284	// And once we do, we report those last results and exit.
				285	// Please do note that the if there are repetitions, the iteration count
				286	// is only calculated for the first repetition, and other repetitions
				287	// simply use that precomputed iteration count.
				288	for (;;) {
				289	i = DoNIterations();
				290
				291	// Do we consider the results to be significant?
				292	// If we are doing repetitions, and the first repetition was already done,
				293	// it has calculated the correct iteration time, so we have run that very
				294	// iteration count just now. No need to calculate anything. Just report.
				295	// Else, the normal rules apply.
				296	const bool results_are_significant = !is_the_first_repetition \|\|
				297	has_explicit_iteration_count \|\|
				298	ShouldReportIterationResults(i);
				299
				300	if (results_are_significant) break; // Good, let's report them!
				301
				302	// Nope, bad iteration. Let's re-estimate the hopefully-sufficient
				303	// iteration count, and run the benchmark again...
				304
				305	iters = PredictNumItersNeeded(i);
				306	assert(iters > i.iters &&
				307	"if we did more iterations than we want to do the next time, "
				308	"then we should have accepted the current iteration run.");
				309	}
				310
				311	// Oh, one last thing, we need to also produce the 'memory measurements'..
				312	MemoryManager::Result memory_result;
				313	size_t memory_iterations = 0;
				314	if (memory_manager != nullptr) {
				315	// Only run a few iterations to reduce the impact of one-time
				316	// allocations in benchmarks that are not properly managed.
				317	memory_iterations = std::min<size_t>(16, iters);
				318	memory_manager->Start();
				319	std::unique_ptr<internal::ThreadManager> manager;
				320	manager.reset(new internal::ThreadManager(1));
				321	RunInThread(&b, memory_iterations, 0, manager.get());
				322	manager->WaitForAllThreads();
				323	manager.reset();
				324
				325	memory_manager->Stop(&memory_result);
				326	}
				327
				328	// Ok, now actualy report.
				329	BenchmarkReporter::Run report = CreateRunReport(
				330	b, i.results, memory_iterations, memory_result, i.seconds);
				331
				332	if (!report.error_occurred && b.complexity != oNone)
				333	complexity_reports.push_back(report);
				334
				335	run_results.non_aggregates.push_back(report);
				336	}
				337	};
				338
				339	} // end namespace
				340
				341	RunResults RunBenchmark(
				342	const benchmark::internal::BenchmarkInstance& b,
				343	std::vector<BenchmarkReporter::Run>* complexity_reports) {
				344	internal::BenchmarkRunner r(b, complexity_reports);
				345	return r.get_results();
				346	}
				347
				348	} // end namespace internal
				349
				350	} // end namespace benchmark