Blame - internal/ceres/parallel_for_test.cc - RealtimeRoboticsGroup/test

blob: 46f5a0f15b6103ce8c05885766ad5319d54a29f1 [file] [log] [blame]

Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	1	// Ceres Solver - A fast non-linear least squares minimizer
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	2	// Copyright 2023 Google Inc. All rights reserved.
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	3	// http://ceres-solver.org/
				4	//
				5	// Redistribution and use in source and binary forms, with or without
				6	// modification, are permitted provided that the following conditions are met:
				7	//
				8	// * Redistributions of source code must retain the above copyright notice,
				9	// this list of conditions and the following disclaimer.
				10	// * Redistributions in binary form must reproduce the above copyright notice,
				11	// this list of conditions and the following disclaimer in the documentation
				12	// and/or other materials provided with the distribution.
				13	// * Neither the name of Google Inc. nor the names of its contributors may be
				14	// used to endorse or promote products derived from this software without
				15	// specific prior written permission.
				16	//
				17	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
				18	// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
				19	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
				20	// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
				21	// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
				22	// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
				23	// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
				24	// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
				25	// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
				26	// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
				27	// POSSIBILITY OF SUCH DAMAGE.
				28	//
				29	// Author: vitus@google.com (Michael Vitus)
				30
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	31	#include "ceres/parallel_for.h"
				32
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	33	#include <atomic>
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	34	#include <cmath>
				35	#include <condition_variable>
				36	#include <mutex>
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	37	#include <numeric>
				38	#include <random>
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	39	#include <thread>
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	40	#include <tuple>
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	41	#include <vector>
				42
				43	#include "ceres/context_impl.h"
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	44	#include "ceres/internal/config.h"
				45	#include "ceres/parallel_vector_ops.h"
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	46	#include "glog/logging.h"
				47	#include "gmock/gmock.h"
				48	#include "gtest/gtest.h"
				49
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	50	namespace ceres::internal {
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	51
				52	using testing::ElementsAreArray;
				53	using testing::UnorderedElementsAreArray;
				54
				55	// Tests the parallel for loop computes the correct result for various number of
				56	// threads.
				57	TEST(ParallelFor, NumThreads) {
				58	ContextImpl context;
				59	context.EnsureMinimumThreads(/num_threads=/2);
				60
				61	const int size = 16;
				62	std::vector<int> expected_results(size, 0);
				63	for (int i = 0; i < size; ++i) {
				64	expected_results[i] = std::sqrt(i);
				65	}
				66
				67	for (int num_threads = 1; num_threads <= 8; ++num_threads) {
				68	std::vector<int> values(size, 0);
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame]	69	ParallelFor(&context, 0, size, num_threads, [&values](int i) {
				70	values[i] = std::sqrt(i);
				71	});
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	72	EXPECT_THAT(values, ElementsAreArray(expected_results));
				73	}
				74	}
				75
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	76	// Tests parallel for loop with ranges
				77	TEST(ParallelForWithRange, NumThreads) {
				78	ContextImpl context;
				79	context.EnsureMinimumThreads(/num_threads=/2);
				80
				81	const int size = 16;
				82	std::vector<int> expected_results(size, 0);
				83	for (int i = 0; i < size; ++i) {
				84	expected_results[i] = std::sqrt(i);
				85	}
				86
				87	for (int num_threads = 1; num_threads <= 8; ++num_threads) {
				88	std::vector<int> values(size, 0);
				89	ParallelFor(
				90	&context, 0, size, num_threads, [&values](std::tuple<int, int> range) {
				91	auto [start, end] = range;
				92	for (int i = start; i < end; ++i) values[i] = std::sqrt(i);
				93	});
				94	EXPECT_THAT(values, ElementsAreArray(expected_results));
				95	}
				96	}
				97
				98	// Tests parallel for loop with ranges and lower bound on minimal range size
				99	TEST(ParallelForWithRange, MinimalSize) {
				100	ContextImpl context;
				101	constexpr int kNumThreads = 4;
				102	constexpr int kMinBlockSize = 5;
				103	context.EnsureMinimumThreads(kNumThreads);
				104
				105	for (int size = kMinBlockSize; size <= 25; ++size) {
				106	std::atomic<bool> failed(false);
				107	ParallelFor(
				108	&context,
				109	0,
				110	size,
				111	kNumThreads,
				112	[&failed, kMinBlockSize](std::tuple<int, int> range) {
				113	auto [start, end] = range;
				114	if (end - start < kMinBlockSize) failed = true;
				115	},
				116	kMinBlockSize);
				117	EXPECT_EQ(failed, false);
				118	}
				119	}
				120
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	121	// Tests the parallel for loop with the thread ID interface computes the correct
				122	// result for various number of threads.
				123	TEST(ParallelForWithThreadId, NumThreads) {
				124	ContextImpl context;
				125	context.EnsureMinimumThreads(/num_threads=/2);
				126
				127	const int size = 16;
				128	std::vector<int> expected_results(size, 0);
				129	for (int i = 0; i < size; ++i) {
				130	expected_results[i] = std::sqrt(i);
				131	}
				132
				133	for (int num_threads = 1; num_threads <= 8; ++num_threads) {
				134	std::vector<int> values(size, 0);
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame]	135	ParallelFor(
				136	&context, 0, size, num_threads, [&values](int thread_id, int i) {
				137	values[i] = std::sqrt(i);
				138	});
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	139	EXPECT_THAT(values, ElementsAreArray(expected_results));
				140	}
				141	}
				142
				143	// Tests nested for loops do not result in a deadlock.
				144	TEST(ParallelFor, NestedParallelForDeadlock) {
				145	ContextImpl context;
				146	context.EnsureMinimumThreads(/num_threads=/2);
				147
				148	// Increment each element in the 2D matrix.
				149	std::vector<std::vector<int>> x(3, {1, 2, 3});
				150	ParallelFor(&context, 0, 3, 2, [&x, &context](int i) {
				151	std::vector<int>& y = x.at(i);
				152	ParallelFor(&context, 0, 3, 2, [&y](int j) { ++y.at(j); });
				153	});
				154
				155	const std::vector<int> results = {2, 3, 4};
				156	for (const std::vector<int>& value : x) {
				157	EXPECT_THAT(value, ElementsAreArray(results));
				158	}
				159	}
				160
				161	// Tests nested for loops do not result in a deadlock for the parallel for with
				162	// thread ID interface.
				163	TEST(ParallelForWithThreadId, NestedParallelForDeadlock) {
				164	ContextImpl context;
				165	context.EnsureMinimumThreads(/num_threads=/2);
				166
				167	// Increment each element in the 2D matrix.
				168	std::vector<std::vector<int>> x(3, {1, 2, 3});
				169	ParallelFor(&context, 0, 3, 2, [&x, &context](int thread_id, int i) {
				170	std::vector<int>& y = x.at(i);
				171	ParallelFor(&context, 0, 3, 2, [&y](int thread_id, int j) { ++y.at(j); });
				172	});
				173
				174	const std::vector<int> results = {2, 3, 4};
				175	for (const std::vector<int>& value : x) {
				176	EXPECT_THAT(value, ElementsAreArray(results));
				177	}
				178	}
				179
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	180	TEST(ParallelForWithThreadId, UniqueThreadIds) {
				181	// Ensure the hardware supports more than 1 thread to ensure the test will
				182	// pass.
				183	const int num_hardware_threads = std::thread::hardware_concurrency();
				184	if (num_hardware_threads <= 1) {
				185	LOG(ERROR)
				186	<< "Test not supported, the hardware does not support threading.";
				187	return;
				188	}
				189
				190	ContextImpl context;
				191	context.EnsureMinimumThreads(/num_threads=/2);
				192	// Increment each element in the 2D matrix.
				193	std::vector<int> x(2, -1);
				194	std::mutex mutex;
				195	std::condition_variable condition;
				196	int count = 0;
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame]	197	ParallelFor(&context,
				198	0,
				199	2,
				200	2,
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	201	[&x, &mutex, &condition, &count](int thread_id, int i) {
				202	std::unique_lock<std::mutex> lock(mutex);
				203	x[i] = thread_id;
				204	++count;
				205	condition.notify_all();
				206	condition.wait(lock, [&]() { return count == 2; });
				207	});
				208
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame]	209	EXPECT_THAT(x, UnorderedElementsAreArray({0, 1}));
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	210	}
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	211
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	212	// Helper function for partition tests
				213	bool BruteForcePartition(
				214	int* costs, int start, int end, int max_partitions, int max_cost);
				215	// Basic test if MaxPartitionCostIsFeasible and BruteForcePartition agree on
				216	// simple test-cases
				217	TEST(GuidedParallelFor, MaxPartitionCostIsFeasible) {
				218	std::vector<int> costs, cumulative_costs, partition;
				219	costs = {1, 2, 3, 5, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0};
				220	cumulative_costs.resize(costs.size());
				221	std::partial_sum(costs.begin(), costs.end(), cumulative_costs.begin());
				222	const auto dummy_getter = [](const int v) { return v; };
				223
				224	// [1, 2, 3] [5], [0 ... 0, 7, 0, ... 0]
				225	EXPECT_TRUE(MaxPartitionCostIsFeasible(0,
				226	costs.size(),
				227	3,
				228	7,
				229	0,
				230	cumulative_costs.data(),
				231	dummy_getter,
				232	&partition));
				233	EXPECT_TRUE(BruteForcePartition(costs.data(), 0, costs.size(), 3, 7));
				234	// [1, 2, 3, 5, 0 ... 0, 7, 0, ... 0]
				235	EXPECT_TRUE(MaxPartitionCostIsFeasible(0,
				236	costs.size(),
				237	3,
				238	18,
				239	0,
				240	cumulative_costs.data(),
				241	dummy_getter,
				242	&partition));
				243	EXPECT_TRUE(BruteForcePartition(costs.data(), 0, costs.size(), 3, 18));
				244	// Impossible since there is item of cost 7
				245	EXPECT_FALSE(MaxPartitionCostIsFeasible(0,
				246	costs.size(),
				247	3,
				248	6,
				249	0,
				250	cumulative_costs.data(),
				251	dummy_getter,
				252	&partition));
				253	EXPECT_FALSE(BruteForcePartition(costs.data(), 0, costs.size(), 3, 6));
				254	// Impossible
				255	EXPECT_FALSE(MaxPartitionCostIsFeasible(0,
				256	costs.size(),
				257	2,
				258	10,
				259	0,
				260	cumulative_costs.data(),
				261	dummy_getter,
				262	&partition));
				263	EXPECT_FALSE(BruteForcePartition(costs.data(), 0, costs.size(), 2, 10));
				264	}
				265
				266	// Randomized tests for MaxPartitionCostIsFeasible
				267	TEST(GuidedParallelFor, MaxPartitionCostIsFeasibleRandomized) {
				268	std::vector<int> costs, cumulative_costs, partition;
				269	const auto dummy_getter = [](const int v) { return v; };
				270
				271	// Random tests
				272	const int kNumTests = 1000;
				273	const int kMaxElements = 32;
				274	const int kMaxPartitions = 16;
				275	const int kMaxElCost = 8;
				276	std::mt19937 rng;
				277	std::uniform_int_distribution<int> rng_N(1, kMaxElements);
				278	std::uniform_int_distribution<int> rng_M(1, kMaxPartitions);
				279	std::uniform_int_distribution<int> rng_e(0, kMaxElCost);
				280	for (int t = 0; t < kNumTests; ++t) {
				281	const int N = rng_N(rng);
				282	const int M = rng_M(rng);
				283	int total = 0;
				284	costs.clear();
				285	for (int i = 0; i < N; ++i) {
				286	costs.push_back(rng_e(rng));
				287	total += costs.back();
				288	}
				289
				290	cumulative_costs.resize(N);
				291	std::partial_sum(costs.begin(), costs.end(), cumulative_costs.begin());
				292
				293	std::uniform_int_distribution<int> rng_seg(0, N - 1);
				294	int start = rng_seg(rng);
				295	int end = rng_seg(rng);
				296	if (start > end) std::swap(start, end);
				297	++end;
				298
				299	int first_admissible = 0;
				300	for (int threshold = 1; threshold <= total; ++threshold) {
				301	const bool bruteforce =
				302	BruteForcePartition(costs.data(), start, end, M, threshold);
				303	if (bruteforce && !first_admissible) {
				304	first_admissible = threshold;
				305	}
				306	const bool binary_search =
				307	MaxPartitionCostIsFeasible(start,
				308	end,
				309	M,
				310	threshold,
				311	start ? cumulative_costs[start - 1] : 0,
				312	cumulative_costs.data(),
				313	dummy_getter,
				314	&partition);
				315	EXPECT_EQ(bruteforce, binary_search);
				316	EXPECT_LE(partition.size(), M + 1);
				317	// check partition itself
				318	if (binary_search) {
				319	ASSERT_GT(partition.size(), 1);
				320	EXPECT_EQ(partition.front(), start);
				321	EXPECT_EQ(partition.back(), end);
				322
				323	const int num_partitions = partition.size() - 1;
				324	EXPECT_LE(num_partitions, M);
				325	for (int j = 0; j < num_partitions; ++j) {
				326	int total = 0;
				327	for (int k = partition[j]; k < partition[j + 1]; ++k) {
				328	EXPECT_LT(k, end);
				329	EXPECT_GE(k, start);
				330	total += costs[k];
				331	}
				332	EXPECT_LE(total, threshold);
				333	}
				334	}
				335	}
				336	}
				337	}
				338
				339	TEST(GuidedParallelFor, PartitionRangeForParallelFor) {
				340	std::vector<int> costs, cumulative_costs, partition;
				341	const auto dummy_getter = [](const int v) { return v; };
				342
				343	// Random tests
				344	const int kNumTests = 1000;
				345	const int kMaxElements = 32;
				346	const int kMaxPartitions = 16;
				347	const int kMaxElCost = 8;
				348	std::mt19937 rng;
				349	std::uniform_int_distribution<int> rng_N(1, kMaxElements);
				350	std::uniform_int_distribution<int> rng_M(1, kMaxPartitions);
				351	std::uniform_int_distribution<int> rng_e(0, kMaxElCost);
				352	for (int t = 0; t < kNumTests; ++t) {
				353	const int N = rng_N(rng);
				354	const int M = rng_M(rng);
				355	int total = 0;
				356	costs.clear();
				357	for (int i = 0; i < N; ++i) {
				358	costs.push_back(rng_e(rng));
				359	total += costs.back();
				360	}
				361
				362	cumulative_costs.resize(N);
				363	std::partial_sum(costs.begin(), costs.end(), cumulative_costs.begin());
				364
				365	std::uniform_int_distribution<int> rng_seg(0, N - 1);
				366	int start = rng_seg(rng);
				367	int end = rng_seg(rng);
				368	if (start > end) std::swap(start, end);
				369	++end;
				370
				371	int first_admissible = 0;
				372	for (int threshold = 1; threshold <= total; ++threshold) {
				373	const bool bruteforce =
				374	BruteForcePartition(costs.data(), start, end, M, threshold);
				375	if (bruteforce) {
				376	first_admissible = threshold;
				377	break;
				378	}
				379	}
				380	EXPECT_TRUE(first_admissible != 0 \|\| total == 0);
				381	partition = PartitionRangeForParallelFor(
				382	start, end, M, cumulative_costs.data(), dummy_getter);
				383	ASSERT_GT(partition.size(), 1);
				384	EXPECT_EQ(partition.front(), start);
				385	EXPECT_EQ(partition.back(), end);
				386
				387	const int num_partitions = partition.size() - 1;
				388	EXPECT_LE(num_partitions, M);
				389	for (int j = 0; j < num_partitions; ++j) {
				390	int total = 0;
				391	for (int k = partition[j]; k < partition[j + 1]; ++k) {
				392	EXPECT_LT(k, end);
				393	EXPECT_GE(k, start);
				394	total += costs[k];
				395	}
				396	EXPECT_LE(total, first_admissible);
				397	}
				398	}
				399	}
				400
				401	// Recursively try to partition range into segements of total cost
				402	// less than max_cost
				403	bool BruteForcePartition(
				404	int* costs, int start, int end, int max_partitions, int max_cost) {
				405	if (start == end) return true;
				406	if (start < end && max_partitions == 0) return false;
				407	int total_cost = 0;
				408	for (int last_curr = start + 1; last_curr <= end; ++last_curr) {
				409	total_cost += costs[last_curr - 1];
				410	if (total_cost > max_cost) break;
				411	if (BruteForcePartition(
				412	costs, last_curr, end, max_partitions - 1, max_cost))
				413	return true;
				414	}
				415	return false;
				416	}
				417
				418	// Tests if guided parallel for loop computes the correct result for various
				419	// number of threads.
				420	TEST(GuidedParallelFor, NumThreads) {
				421	ContextImpl context;
				422	context.EnsureMinimumThreads(/num_threads=/2);
				423
				424	const int size = 16;
				425	std::vector<int> expected_results(size, 0);
				426	for (int i = 0; i < size; ++i) {
				427	expected_results[i] = std::sqrt(i);
				428	}
				429
				430	std::vector<int> costs, cumulative_costs;
				431	for (int i = 1; i <= size; ++i) {
				432	int cost = i * i;
				433	costs.push_back(cost);
				434	if (i == 1) {
				435	cumulative_costs.push_back(cost);
				436	} else {
				437	cumulative_costs.push_back(cost + cumulative_costs.back());
				438	}
				439	}
				440
				441	for (int num_threads = 1; num_threads <= 8; ++num_threads) {
				442	std::vector<int> values(size, 0);
				443	ParallelFor(
				444	&context,
				445	0,
				446	size,
				447	num_threads,
				448	[&values](int i) { values[i] = std::sqrt(i); },
				449	cumulative_costs.data(),
				450	[](const int v) { return v; });
				451	EXPECT_THAT(values, ElementsAreArray(expected_results));
				452	}
				453	}
				454
				455	TEST(ParallelAssign, D2MulX) {
				456	const int kVectorSize = 1024 * 1024;
				457	const int kMaxNumThreads = 8;
				458	const double kEpsilon = 1e-16;
				459
				460	const Vector D_full = Vector::Random(kVectorSize * 2);
				461	const ConstVectorRef D(D_full.data() + kVectorSize, kVectorSize);
				462	const Vector x = Vector::Random(kVectorSize);
				463	const Vector y_expected = D.array().square() * x.array();
				464	ContextImpl context;
				465	context.EnsureMinimumThreads(kMaxNumThreads);
				466
				467	for (int num_threads = 1; num_threads <= kMaxNumThreads; ++num_threads) {
				468	Vector y_observed(kVectorSize);
				469	ParallelAssign(
				470	&context, num_threads, y_observed, D.array().square() * x.array());
				471
				472	// We might get non-bit-exact result due to different precision in scalar
				473	// and vector code. For example, in x86 mode mingw might emit x87
				474	// instructions for scalar code, thus making bit-exact check fail
				475	EXPECT_NEAR((y_expected - y_observed).squaredNorm(),
				476	0.,
				477	kEpsilon * y_expected.squaredNorm());
				478	}
				479	}
				480
				481	TEST(ParallelAssign, SetZero) {
				482	const int kVectorSize = 1024 * 1024;
				483	const int kMaxNumThreads = 8;
				484
				485	ContextImpl context;
				486	context.EnsureMinimumThreads(kMaxNumThreads);
				487
				488	for (int num_threads = 1; num_threads <= kMaxNumThreads; ++num_threads) {
				489	Vector x = Vector::Random(kVectorSize);
				490	ParallelSetZero(&context, num_threads, x);
				491
				492	CHECK_EQ(x.squaredNorm(), 0.);
				493	}
				494	}
				495
				496	} // namespace ceres::internal