blob: 46f5a0f15b6103ce8c05885766ad5319d54a29f1 [file] [log] [blame]
Austin Schuh70cc9552019-01-21 19:46:48 -08001// Ceres Solver - A fast non-linear least squares minimizer
Austin Schuh3de38b02024-06-25 18:25:10 -07002// Copyright 2023 Google Inc. All rights reserved.
Austin Schuh70cc9552019-01-21 19:46:48 -08003// http://ceres-solver.org/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7//
8// * Redistributions of source code must retain the above copyright notice,
9// this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above copyright notice,
11// this list of conditions and the following disclaimer in the documentation
12// and/or other materials provided with the distribution.
13// * Neither the name of Google Inc. nor the names of its contributors may be
14// used to endorse or promote products derived from this software without
15// specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27// POSSIBILITY OF SUCH DAMAGE.
28//
29// Author: vitus@google.com (Michael Vitus)
30
Austin Schuh70cc9552019-01-21 19:46:48 -080031#include "ceres/parallel_for.h"
32
Austin Schuh3de38b02024-06-25 18:25:10 -070033#include <atomic>
Austin Schuh70cc9552019-01-21 19:46:48 -080034#include <cmath>
35#include <condition_variable>
36#include <mutex>
Austin Schuh3de38b02024-06-25 18:25:10 -070037#include <numeric>
38#include <random>
Austin Schuh70cc9552019-01-21 19:46:48 -080039#include <thread>
Austin Schuh3de38b02024-06-25 18:25:10 -070040#include <tuple>
Austin Schuh70cc9552019-01-21 19:46:48 -080041#include <vector>
42
43#include "ceres/context_impl.h"
Austin Schuh3de38b02024-06-25 18:25:10 -070044#include "ceres/internal/config.h"
45#include "ceres/parallel_vector_ops.h"
Austin Schuh70cc9552019-01-21 19:46:48 -080046#include "glog/logging.h"
47#include "gmock/gmock.h"
48#include "gtest/gtest.h"
49
Austin Schuh3de38b02024-06-25 18:25:10 -070050namespace ceres::internal {
Austin Schuh70cc9552019-01-21 19:46:48 -080051
52using testing::ElementsAreArray;
53using testing::UnorderedElementsAreArray;
54
55// Tests the parallel for loop computes the correct result for various number of
56// threads.
57TEST(ParallelFor, NumThreads) {
58 ContextImpl context;
59 context.EnsureMinimumThreads(/*num_threads=*/2);
60
61 const int size = 16;
62 std::vector<int> expected_results(size, 0);
63 for (int i = 0; i < size; ++i) {
64 expected_results[i] = std::sqrt(i);
65 }
66
67 for (int num_threads = 1; num_threads <= 8; ++num_threads) {
68 std::vector<int> values(size, 0);
Austin Schuh1d1e6ea2020-12-23 21:56:30 -080069 ParallelFor(&context, 0, size, num_threads, [&values](int i) {
70 values[i] = std::sqrt(i);
71 });
Austin Schuh70cc9552019-01-21 19:46:48 -080072 EXPECT_THAT(values, ElementsAreArray(expected_results));
73 }
74}
75
Austin Schuh3de38b02024-06-25 18:25:10 -070076// Tests parallel for loop with ranges
77TEST(ParallelForWithRange, NumThreads) {
78 ContextImpl context;
79 context.EnsureMinimumThreads(/*num_threads=*/2);
80
81 const int size = 16;
82 std::vector<int> expected_results(size, 0);
83 for (int i = 0; i < size; ++i) {
84 expected_results[i] = std::sqrt(i);
85 }
86
87 for (int num_threads = 1; num_threads <= 8; ++num_threads) {
88 std::vector<int> values(size, 0);
89 ParallelFor(
90 &context, 0, size, num_threads, [&values](std::tuple<int, int> range) {
91 auto [start, end] = range;
92 for (int i = start; i < end; ++i) values[i] = std::sqrt(i);
93 });
94 EXPECT_THAT(values, ElementsAreArray(expected_results));
95 }
96}
97
98// Tests parallel for loop with ranges and lower bound on minimal range size
99TEST(ParallelForWithRange, MinimalSize) {
100 ContextImpl context;
101 constexpr int kNumThreads = 4;
102 constexpr int kMinBlockSize = 5;
103 context.EnsureMinimumThreads(kNumThreads);
104
105 for (int size = kMinBlockSize; size <= 25; ++size) {
106 std::atomic<bool> failed(false);
107 ParallelFor(
108 &context,
109 0,
110 size,
111 kNumThreads,
112 [&failed, kMinBlockSize](std::tuple<int, int> range) {
113 auto [start, end] = range;
114 if (end - start < kMinBlockSize) failed = true;
115 },
116 kMinBlockSize);
117 EXPECT_EQ(failed, false);
118 }
119}
120
Austin Schuh70cc9552019-01-21 19:46:48 -0800121// Tests the parallel for loop with the thread ID interface computes the correct
122// result for various number of threads.
123TEST(ParallelForWithThreadId, NumThreads) {
124 ContextImpl context;
125 context.EnsureMinimumThreads(/*num_threads=*/2);
126
127 const int size = 16;
128 std::vector<int> expected_results(size, 0);
129 for (int i = 0; i < size; ++i) {
130 expected_results[i] = std::sqrt(i);
131 }
132
133 for (int num_threads = 1; num_threads <= 8; ++num_threads) {
134 std::vector<int> values(size, 0);
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800135 ParallelFor(
136 &context, 0, size, num_threads, [&values](int thread_id, int i) {
137 values[i] = std::sqrt(i);
138 });
Austin Schuh70cc9552019-01-21 19:46:48 -0800139 EXPECT_THAT(values, ElementsAreArray(expected_results));
140 }
141}
142
143// Tests nested for loops do not result in a deadlock.
144TEST(ParallelFor, NestedParallelForDeadlock) {
145 ContextImpl context;
146 context.EnsureMinimumThreads(/*num_threads=*/2);
147
148 // Increment each element in the 2D matrix.
149 std::vector<std::vector<int>> x(3, {1, 2, 3});
150 ParallelFor(&context, 0, 3, 2, [&x, &context](int i) {
151 std::vector<int>& y = x.at(i);
152 ParallelFor(&context, 0, 3, 2, [&y](int j) { ++y.at(j); });
153 });
154
155 const std::vector<int> results = {2, 3, 4};
156 for (const std::vector<int>& value : x) {
157 EXPECT_THAT(value, ElementsAreArray(results));
158 }
159}
160
161// Tests nested for loops do not result in a deadlock for the parallel for with
162// thread ID interface.
163TEST(ParallelForWithThreadId, NestedParallelForDeadlock) {
164 ContextImpl context;
165 context.EnsureMinimumThreads(/*num_threads=*/2);
166
167 // Increment each element in the 2D matrix.
168 std::vector<std::vector<int>> x(3, {1, 2, 3});
169 ParallelFor(&context, 0, 3, 2, [&x, &context](int thread_id, int i) {
170 std::vector<int>& y = x.at(i);
171 ParallelFor(&context, 0, 3, 2, [&y](int thread_id, int j) { ++y.at(j); });
172 });
173
174 const std::vector<int> results = {2, 3, 4};
175 for (const std::vector<int>& value : x) {
176 EXPECT_THAT(value, ElementsAreArray(results));
177 }
178}
179
Austin Schuh70cc9552019-01-21 19:46:48 -0800180TEST(ParallelForWithThreadId, UniqueThreadIds) {
181 // Ensure the hardware supports more than 1 thread to ensure the test will
182 // pass.
183 const int num_hardware_threads = std::thread::hardware_concurrency();
184 if (num_hardware_threads <= 1) {
185 LOG(ERROR)
186 << "Test not supported, the hardware does not support threading.";
187 return;
188 }
189
190 ContextImpl context;
191 context.EnsureMinimumThreads(/*num_threads=*/2);
192 // Increment each element in the 2D matrix.
193 std::vector<int> x(2, -1);
194 std::mutex mutex;
195 std::condition_variable condition;
196 int count = 0;
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800197 ParallelFor(&context,
198 0,
199 2,
200 2,
Austin Schuh70cc9552019-01-21 19:46:48 -0800201 [&x, &mutex, &condition, &count](int thread_id, int i) {
202 std::unique_lock<std::mutex> lock(mutex);
203 x[i] = thread_id;
204 ++count;
205 condition.notify_all();
206 condition.wait(lock, [&]() { return count == 2; });
207 });
208
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800209 EXPECT_THAT(x, UnorderedElementsAreArray({0, 1}));
Austin Schuh70cc9552019-01-21 19:46:48 -0800210}
Austin Schuh70cc9552019-01-21 19:46:48 -0800211
Austin Schuh3de38b02024-06-25 18:25:10 -0700212// Helper function for partition tests
213bool BruteForcePartition(
214 int* costs, int start, int end, int max_partitions, int max_cost);
215// Basic test if MaxPartitionCostIsFeasible and BruteForcePartition agree on
216// simple test-cases
217TEST(GuidedParallelFor, MaxPartitionCostIsFeasible) {
218 std::vector<int> costs, cumulative_costs, partition;
219 costs = {1, 2, 3, 5, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0};
220 cumulative_costs.resize(costs.size());
221 std::partial_sum(costs.begin(), costs.end(), cumulative_costs.begin());
222 const auto dummy_getter = [](const int v) { return v; };
223
224 // [1, 2, 3] [5], [0 ... 0, 7, 0, ... 0]
225 EXPECT_TRUE(MaxPartitionCostIsFeasible(0,
226 costs.size(),
227 3,
228 7,
229 0,
230 cumulative_costs.data(),
231 dummy_getter,
232 &partition));
233 EXPECT_TRUE(BruteForcePartition(costs.data(), 0, costs.size(), 3, 7));
234 // [1, 2, 3, 5, 0 ... 0, 7, 0, ... 0]
235 EXPECT_TRUE(MaxPartitionCostIsFeasible(0,
236 costs.size(),
237 3,
238 18,
239 0,
240 cumulative_costs.data(),
241 dummy_getter,
242 &partition));
243 EXPECT_TRUE(BruteForcePartition(costs.data(), 0, costs.size(), 3, 18));
244 // Impossible since there is item of cost 7
245 EXPECT_FALSE(MaxPartitionCostIsFeasible(0,
246 costs.size(),
247 3,
248 6,
249 0,
250 cumulative_costs.data(),
251 dummy_getter,
252 &partition));
253 EXPECT_FALSE(BruteForcePartition(costs.data(), 0, costs.size(), 3, 6));
254 // Impossible
255 EXPECT_FALSE(MaxPartitionCostIsFeasible(0,
256 costs.size(),
257 2,
258 10,
259 0,
260 cumulative_costs.data(),
261 dummy_getter,
262 &partition));
263 EXPECT_FALSE(BruteForcePartition(costs.data(), 0, costs.size(), 2, 10));
264}
265
266// Randomized tests for MaxPartitionCostIsFeasible
267TEST(GuidedParallelFor, MaxPartitionCostIsFeasibleRandomized) {
268 std::vector<int> costs, cumulative_costs, partition;
269 const auto dummy_getter = [](const int v) { return v; };
270
271 // Random tests
272 const int kNumTests = 1000;
273 const int kMaxElements = 32;
274 const int kMaxPartitions = 16;
275 const int kMaxElCost = 8;
276 std::mt19937 rng;
277 std::uniform_int_distribution<int> rng_N(1, kMaxElements);
278 std::uniform_int_distribution<int> rng_M(1, kMaxPartitions);
279 std::uniform_int_distribution<int> rng_e(0, kMaxElCost);
280 for (int t = 0; t < kNumTests; ++t) {
281 const int N = rng_N(rng);
282 const int M = rng_M(rng);
283 int total = 0;
284 costs.clear();
285 for (int i = 0; i < N; ++i) {
286 costs.push_back(rng_e(rng));
287 total += costs.back();
288 }
289
290 cumulative_costs.resize(N);
291 std::partial_sum(costs.begin(), costs.end(), cumulative_costs.begin());
292
293 std::uniform_int_distribution<int> rng_seg(0, N - 1);
294 int start = rng_seg(rng);
295 int end = rng_seg(rng);
296 if (start > end) std::swap(start, end);
297 ++end;
298
299 int first_admissible = 0;
300 for (int threshold = 1; threshold <= total; ++threshold) {
301 const bool bruteforce =
302 BruteForcePartition(costs.data(), start, end, M, threshold);
303 if (bruteforce && !first_admissible) {
304 first_admissible = threshold;
305 }
306 const bool binary_search =
307 MaxPartitionCostIsFeasible(start,
308 end,
309 M,
310 threshold,
311 start ? cumulative_costs[start - 1] : 0,
312 cumulative_costs.data(),
313 dummy_getter,
314 &partition);
315 EXPECT_EQ(bruteforce, binary_search);
316 EXPECT_LE(partition.size(), M + 1);
317 // check partition itself
318 if (binary_search) {
319 ASSERT_GT(partition.size(), 1);
320 EXPECT_EQ(partition.front(), start);
321 EXPECT_EQ(partition.back(), end);
322
323 const int num_partitions = partition.size() - 1;
324 EXPECT_LE(num_partitions, M);
325 for (int j = 0; j < num_partitions; ++j) {
326 int total = 0;
327 for (int k = partition[j]; k < partition[j + 1]; ++k) {
328 EXPECT_LT(k, end);
329 EXPECT_GE(k, start);
330 total += costs[k];
331 }
332 EXPECT_LE(total, threshold);
333 }
334 }
335 }
336 }
337}
338
339TEST(GuidedParallelFor, PartitionRangeForParallelFor) {
340 std::vector<int> costs, cumulative_costs, partition;
341 const auto dummy_getter = [](const int v) { return v; };
342
343 // Random tests
344 const int kNumTests = 1000;
345 const int kMaxElements = 32;
346 const int kMaxPartitions = 16;
347 const int kMaxElCost = 8;
348 std::mt19937 rng;
349 std::uniform_int_distribution<int> rng_N(1, kMaxElements);
350 std::uniform_int_distribution<int> rng_M(1, kMaxPartitions);
351 std::uniform_int_distribution<int> rng_e(0, kMaxElCost);
352 for (int t = 0; t < kNumTests; ++t) {
353 const int N = rng_N(rng);
354 const int M = rng_M(rng);
355 int total = 0;
356 costs.clear();
357 for (int i = 0; i < N; ++i) {
358 costs.push_back(rng_e(rng));
359 total += costs.back();
360 }
361
362 cumulative_costs.resize(N);
363 std::partial_sum(costs.begin(), costs.end(), cumulative_costs.begin());
364
365 std::uniform_int_distribution<int> rng_seg(0, N - 1);
366 int start = rng_seg(rng);
367 int end = rng_seg(rng);
368 if (start > end) std::swap(start, end);
369 ++end;
370
371 int first_admissible = 0;
372 for (int threshold = 1; threshold <= total; ++threshold) {
373 const bool bruteforce =
374 BruteForcePartition(costs.data(), start, end, M, threshold);
375 if (bruteforce) {
376 first_admissible = threshold;
377 break;
378 }
379 }
380 EXPECT_TRUE(first_admissible != 0 || total == 0);
381 partition = PartitionRangeForParallelFor(
382 start, end, M, cumulative_costs.data(), dummy_getter);
383 ASSERT_GT(partition.size(), 1);
384 EXPECT_EQ(partition.front(), start);
385 EXPECT_EQ(partition.back(), end);
386
387 const int num_partitions = partition.size() - 1;
388 EXPECT_LE(num_partitions, M);
389 for (int j = 0; j < num_partitions; ++j) {
390 int total = 0;
391 for (int k = partition[j]; k < partition[j + 1]; ++k) {
392 EXPECT_LT(k, end);
393 EXPECT_GE(k, start);
394 total += costs[k];
395 }
396 EXPECT_LE(total, first_admissible);
397 }
398 }
399}
400
401// Recursively try to partition range into segements of total cost
402// less than max_cost
403bool BruteForcePartition(
404 int* costs, int start, int end, int max_partitions, int max_cost) {
405 if (start == end) return true;
406 if (start < end && max_partitions == 0) return false;
407 int total_cost = 0;
408 for (int last_curr = start + 1; last_curr <= end; ++last_curr) {
409 total_cost += costs[last_curr - 1];
410 if (total_cost > max_cost) break;
411 if (BruteForcePartition(
412 costs, last_curr, end, max_partitions - 1, max_cost))
413 return true;
414 }
415 return false;
416}
417
418// Tests if guided parallel for loop computes the correct result for various
419// number of threads.
420TEST(GuidedParallelFor, NumThreads) {
421 ContextImpl context;
422 context.EnsureMinimumThreads(/*num_threads=*/2);
423
424 const int size = 16;
425 std::vector<int> expected_results(size, 0);
426 for (int i = 0; i < size; ++i) {
427 expected_results[i] = std::sqrt(i);
428 }
429
430 std::vector<int> costs, cumulative_costs;
431 for (int i = 1; i <= size; ++i) {
432 int cost = i * i;
433 costs.push_back(cost);
434 if (i == 1) {
435 cumulative_costs.push_back(cost);
436 } else {
437 cumulative_costs.push_back(cost + cumulative_costs.back());
438 }
439 }
440
441 for (int num_threads = 1; num_threads <= 8; ++num_threads) {
442 std::vector<int> values(size, 0);
443 ParallelFor(
444 &context,
445 0,
446 size,
447 num_threads,
448 [&values](int i) { values[i] = std::sqrt(i); },
449 cumulative_costs.data(),
450 [](const int v) { return v; });
451 EXPECT_THAT(values, ElementsAreArray(expected_results));
452 }
453}
454
455TEST(ParallelAssign, D2MulX) {
456 const int kVectorSize = 1024 * 1024;
457 const int kMaxNumThreads = 8;
458 const double kEpsilon = 1e-16;
459
460 const Vector D_full = Vector::Random(kVectorSize * 2);
461 const ConstVectorRef D(D_full.data() + kVectorSize, kVectorSize);
462 const Vector x = Vector::Random(kVectorSize);
463 const Vector y_expected = D.array().square() * x.array();
464 ContextImpl context;
465 context.EnsureMinimumThreads(kMaxNumThreads);
466
467 for (int num_threads = 1; num_threads <= kMaxNumThreads; ++num_threads) {
468 Vector y_observed(kVectorSize);
469 ParallelAssign(
470 &context, num_threads, y_observed, D.array().square() * x.array());
471
472 // We might get non-bit-exact result due to different precision in scalar
473 // and vector code. For example, in x86 mode mingw might emit x87
474 // instructions for scalar code, thus making bit-exact check fail
475 EXPECT_NEAR((y_expected - y_observed).squaredNorm(),
476 0.,
477 kEpsilon * y_expected.squaredNorm());
478 }
479}
480
481TEST(ParallelAssign, SetZero) {
482 const int kVectorSize = 1024 * 1024;
483 const int kMaxNumThreads = 8;
484
485 ContextImpl context;
486 context.EnsureMinimumThreads(kMaxNumThreads);
487
488 for (int num_threads = 1; num_threads <= kMaxNumThreads; ++num_threads) {
489 Vector x = Vector::Random(kVectorSize);
490 ParallelSetZero(&context, num_threads, x);
491
492 CHECK_EQ(x.squaredNorm(), 0.);
493 }
494}
495
496} // namespace ceres::internal