blob: bd02439b500cde00927e00403f9335c73ba8c8c1 [file] [log] [blame]
Austin Schuh70cc9552019-01-21 19:46:48 -08001// Ceres Solver - A fast non-linear least squares minimizer
Austin Schuh3de38b02024-06-25 18:25:10 -07002// Copyright 2023 Google Inc. All rights reserved.
Austin Schuh70cc9552019-01-21 19:46:48 -08003// http://ceres-solver.org/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7//
8// * Redistributions of source code must retain the above copyright notice,
9// this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above copyright notice,
11// this list of conditions and the following disclaimer in the documentation
12// and/or other materials provided with the distribution.
13// * Neither the name of Google Inc. nor the names of its contributors may be
14// used to endorse or promote products derived from this software without
15// specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27// POSSIBILITY OF SUCH DAMAGE.
28//
29// Author: sameeragarwal@google.com (Sameer Agarwal)
30
Austin Schuh70cc9552019-01-21 19:46:48 -080031#include <algorithm>
32#include <cstring>
Austin Schuh3de38b02024-06-25 18:25:10 -070033#include <memory>
Austin Schuh70cc9552019-01-21 19:46:48 -080034#include <vector>
Austin Schuh1d1e6ea2020-12-23 21:56:30 -080035
Austin Schuh70cc9552019-01-21 19:46:48 -080036#include "ceres/block_sparse_matrix.h"
37#include "ceres/block_structure.h"
38#include "ceres/internal/eigen.h"
Austin Schuh3de38b02024-06-25 18:25:10 -070039#include "ceres/parallel_for.h"
40#include "ceres/partition_range_for_parallel_for.h"
Austin Schuh1d1e6ea2020-12-23 21:56:30 -080041#include "ceres/partitioned_matrix_view.h"
Austin Schuh70cc9552019-01-21 19:46:48 -080042#include "ceres/small_blas.h"
43#include "glog/logging.h"
44
Austin Schuh3de38b02024-06-25 18:25:10 -070045namespace ceres::internal {
Austin Schuh70cc9552019-01-21 19:46:48 -080046
47template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
48PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
Austin Schuh3de38b02024-06-25 18:25:10 -070049 PartitionedMatrixView(const LinearSolver::Options& options,
50 const BlockSparseMatrix& matrix)
51
52 : options_(options), matrix_(matrix) {
Austin Schuh70cc9552019-01-21 19:46:48 -080053 const CompressedRowBlockStructure* bs = matrix_.block_structure();
54 CHECK(bs != nullptr);
55
Austin Schuh3de38b02024-06-25 18:25:10 -070056 num_col_blocks_e_ = options_.elimination_groups[0];
Austin Schuh70cc9552019-01-21 19:46:48 -080057 num_col_blocks_f_ = bs->cols.size() - num_col_blocks_e_;
58
59 // Compute the number of row blocks in E. The number of row blocks
60 // in E maybe less than the number of row blocks in the input matrix
61 // as some of the row blocks at the bottom may not have any
62 // e_blocks. For a definition of what an e_block is, please see
Austin Schuh3de38b02024-06-25 18:25:10 -070063 // schur_complement_solver.h
Austin Schuh70cc9552019-01-21 19:46:48 -080064 num_row_blocks_e_ = 0;
Austin Schuh3de38b02024-06-25 18:25:10 -070065 for (const auto& row : bs->rows) {
66 const std::vector<Cell>& cells = row.cells;
Austin Schuh70cc9552019-01-21 19:46:48 -080067 if (cells[0].block_id < num_col_blocks_e_) {
68 ++num_row_blocks_e_;
69 }
70 }
71
72 // Compute the number of columns in E and F.
73 num_cols_e_ = 0;
74 num_cols_f_ = 0;
75
76 for (int c = 0; c < bs->cols.size(); ++c) {
77 const Block& block = bs->cols[c];
78 if (c < num_col_blocks_e_) {
79 num_cols_e_ += block.size;
80 } else {
81 num_cols_f_ += block.size;
82 }
83 }
84
85 CHECK_EQ(num_cols_e_ + num_cols_f_, matrix_.num_cols());
Austin Schuh70cc9552019-01-21 19:46:48 -080086
Austin Schuh3de38b02024-06-25 18:25:10 -070087 auto transpose_bs = matrix_.transpose_block_structure();
88 const int num_threads = options_.num_threads;
89 if (transpose_bs != nullptr && num_threads > 1) {
90 int kMaxPartitions = num_threads * 4;
91 e_cols_partition_ = PartitionRangeForParallelFor(
92 0,
93 num_col_blocks_e_,
94 kMaxPartitions,
95 transpose_bs->rows.data(),
96 [](const CompressedRow& row) { return row.cumulative_nnz; });
97
98 f_cols_partition_ = PartitionRangeForParallelFor(
99 num_col_blocks_e_,
100 num_col_blocks_e_ + num_col_blocks_f_,
101 kMaxPartitions,
102 transpose_bs->rows.data(),
103 [](const CompressedRow& row) { return row.cumulative_nnz; });
104 }
105}
Austin Schuh70cc9552019-01-21 19:46:48 -0800106
107// The next four methods don't seem to be particularly cache
108// friendly. This is an artifact of how the BlockStructure of the
109// input matrix is constructed. These methods will benefit from
110// multithreading as well as improved data layout.
111
112template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800113void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
Austin Schuh3de38b02024-06-25 18:25:10 -0700114 RightMultiplyAndAccumulateE(const double* x, double* y) const {
Austin Schuh70cc9552019-01-21 19:46:48 -0800115 // Iterate over the first num_row_blocks_e_ row blocks, and multiply
116 // by the first cell in each row block.
Austin Schuh3de38b02024-06-25 18:25:10 -0700117 auto bs = matrix_.block_structure();
Austin Schuh70cc9552019-01-21 19:46:48 -0800118 const double* values = matrix_.values();
Austin Schuh3de38b02024-06-25 18:25:10 -0700119 ParallelFor(options_.context,
120 0,
121 num_row_blocks_e_,
122 options_.num_threads,
123 [values, bs, x, y](int row_block_id) {
124 const Cell& cell = bs->rows[row_block_id].cells[0];
125 const int row_block_pos = bs->rows[row_block_id].block.position;
126 const int row_block_size = bs->rows[row_block_id].block.size;
127 const int col_block_id = cell.block_id;
128 const int col_block_pos = bs->cols[col_block_id].position;
129 const int col_block_size = bs->cols[col_block_id].size;
130 // clang-format off
131 MatrixVectorMultiply<kRowBlockSize, kEBlockSize, 1>(
132 values + cell.position, row_block_size, col_block_size,
133 x + col_block_pos,
134 y + row_block_pos);
135 // clang-format on
136 });
Austin Schuh70cc9552019-01-21 19:46:48 -0800137}
138
139template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800140void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
Austin Schuh3de38b02024-06-25 18:25:10 -0700141 RightMultiplyAndAccumulateF(const double* x, double* y) const {
Austin Schuh70cc9552019-01-21 19:46:48 -0800142 // Iterate over row blocks, and if the row block is in E, then
143 // multiply by all the cells except the first one which is of type
144 // E. If the row block is not in E (i.e its in the bottom
145 // num_row_blocks - num_row_blocks_e row blocks), then all the cells
146 // are of type F and multiply by them all.
Austin Schuh3de38b02024-06-25 18:25:10 -0700147 const CompressedRowBlockStructure* bs = matrix_.block_structure();
148 const int num_row_blocks = bs->rows.size();
149 const int num_cols_e = num_cols_e_;
Austin Schuh70cc9552019-01-21 19:46:48 -0800150 const double* values = matrix_.values();
Austin Schuh3de38b02024-06-25 18:25:10 -0700151 ParallelFor(options_.context,
152 0,
153 num_row_blocks_e_,
154 options_.num_threads,
155 [values, bs, num_cols_e, x, y](int row_block_id) {
156 const int row_block_pos = bs->rows[row_block_id].block.position;
157 const int row_block_size = bs->rows[row_block_id].block.size;
158 const auto& cells = bs->rows[row_block_id].cells;
159 for (int c = 1; c < cells.size(); ++c) {
160 const int col_block_id = cells[c].block_id;
161 const int col_block_pos = bs->cols[col_block_id].position;
162 const int col_block_size = bs->cols[col_block_id].size;
163 // clang-format off
164 MatrixVectorMultiply<kRowBlockSize, kFBlockSize, 1>(
165 values + cells[c].position, row_block_size, col_block_size,
166 x + col_block_pos - num_cols_e,
167 y + row_block_pos);
168 // clang-format on
169 }
170 });
171 ParallelFor(options_.context,
172 num_row_blocks_e_,
173 num_row_blocks,
174 options_.num_threads,
175 [values, bs, num_cols_e, x, y](int row_block_id) {
176 const int row_block_pos = bs->rows[row_block_id].block.position;
177 const int row_block_size = bs->rows[row_block_id].block.size;
178 const auto& cells = bs->rows[row_block_id].cells;
179 for (const auto& cell : cells) {
180 const int col_block_id = cell.block_id;
181 const int col_block_pos = bs->cols[col_block_id].position;
182 const int col_block_size = bs->cols[col_block_id].size;
183 // clang-format off
184 MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
185 values + cell.position, row_block_size, col_block_size,
186 x + col_block_pos - num_cols_e,
187 y + row_block_pos);
188 // clang-format on
189 }
190 });
191}
Austin Schuh70cc9552019-01-21 19:46:48 -0800192
Austin Schuh3de38b02024-06-25 18:25:10 -0700193template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
194void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
195 LeftMultiplyAndAccumulateE(const double* x, double* y) const {
196 if (!num_col_blocks_e_) return;
197 if (!num_row_blocks_e_) return;
198 if (options_.num_threads == 1) {
199 LeftMultiplyAndAccumulateESingleThreaded(x, y);
200 } else {
201 CHECK(options_.context != nullptr);
202 LeftMultiplyAndAccumulateEMultiThreaded(x, y);
Austin Schuh70cc9552019-01-21 19:46:48 -0800203 }
204}
205
206template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800207void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
Austin Schuh3de38b02024-06-25 18:25:10 -0700208 LeftMultiplyAndAccumulateESingleThreaded(const double* x, double* y) const {
Austin Schuh70cc9552019-01-21 19:46:48 -0800209 const CompressedRowBlockStructure* bs = matrix_.block_structure();
210
211 // Iterate over the first num_row_blocks_e_ row blocks, and multiply
212 // by the first cell in each row block.
213 const double* values = matrix_.values();
214 for (int r = 0; r < num_row_blocks_e_; ++r) {
215 const Cell& cell = bs->rows[r].cells[0];
216 const int row_block_pos = bs->rows[r].block.position;
217 const int row_block_size = bs->rows[r].block.size;
218 const int col_block_id = cell.block_id;
219 const int col_block_pos = bs->cols[col_block_id].position;
220 const int col_block_size = bs->cols[col_block_id].size;
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800221 // clang-format off
Austin Schuh70cc9552019-01-21 19:46:48 -0800222 MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>(
223 values + cell.position, row_block_size, col_block_size,
224 x + row_block_pos,
225 y + col_block_pos);
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800226 // clang-format on
Austin Schuh70cc9552019-01-21 19:46:48 -0800227 }
228}
229
230template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800231void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
Austin Schuh3de38b02024-06-25 18:25:10 -0700232 LeftMultiplyAndAccumulateEMultiThreaded(const double* x, double* y) const {
233 auto transpose_bs = matrix_.transpose_block_structure();
234 CHECK(transpose_bs != nullptr);
235
236 // Local copies of class members in order to avoid capturing pointer to the
237 // whole object in lambda function
238 auto values = matrix_.values();
239 const int num_row_blocks_e = num_row_blocks_e_;
240 ParallelFor(
241 options_.context,
242 0,
243 num_col_blocks_e_,
244 options_.num_threads,
245 [values, transpose_bs, num_row_blocks_e, x, y](int row_block_id) {
246 int row_block_pos = transpose_bs->rows[row_block_id].block.position;
247 int row_block_size = transpose_bs->rows[row_block_id].block.size;
248 auto& cells = transpose_bs->rows[row_block_id].cells;
249
250 for (auto& cell : cells) {
251 const int col_block_id = cell.block_id;
252 const int col_block_size = transpose_bs->cols[col_block_id].size;
253 const int col_block_pos = transpose_bs->cols[col_block_id].position;
254 if (col_block_id >= num_row_blocks_e) break;
255 MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>(
256 values + cell.position,
257 col_block_size,
258 row_block_size,
259 x + col_block_pos,
260 y + row_block_pos);
261 }
262 },
263 e_cols_partition());
264}
265
266template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
267void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
268 LeftMultiplyAndAccumulateF(const double* x, double* y) const {
269 if (!num_col_blocks_f_) return;
270 if (options_.num_threads == 1) {
271 LeftMultiplyAndAccumulateFSingleThreaded(x, y);
272 } else {
273 CHECK(options_.context != nullptr);
274 LeftMultiplyAndAccumulateFMultiThreaded(x, y);
275 }
276}
277
278template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
279void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
280 LeftMultiplyAndAccumulateFSingleThreaded(const double* x, double* y) const {
Austin Schuh70cc9552019-01-21 19:46:48 -0800281 const CompressedRowBlockStructure* bs = matrix_.block_structure();
282
283 // Iterate over row blocks, and if the row block is in E, then
284 // multiply by all the cells except the first one which is of type
285 // E. If the row block is not in E (i.e its in the bottom
286 // num_row_blocks - num_row_blocks_e row blocks), then all the cells
287 // are of type F and multiply by them all.
288 const double* values = matrix_.values();
289 for (int r = 0; r < num_row_blocks_e_; ++r) {
290 const int row_block_pos = bs->rows[r].block.position;
291 const int row_block_size = bs->rows[r].block.size;
292 const std::vector<Cell>& cells = bs->rows[r].cells;
293 for (int c = 1; c < cells.size(); ++c) {
294 const int col_block_id = cells[c].block_id;
295 const int col_block_pos = bs->cols[col_block_id].position;
296 const int col_block_size = bs->cols[col_block_id].size;
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800297 // clang-format off
Austin Schuh70cc9552019-01-21 19:46:48 -0800298 MatrixTransposeVectorMultiply<kRowBlockSize, kFBlockSize, 1>(
299 values + cells[c].position, row_block_size, col_block_size,
300 x + row_block_pos,
301 y + col_block_pos - num_cols_e_);
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800302 // clang-format on
Austin Schuh70cc9552019-01-21 19:46:48 -0800303 }
304 }
305
306 for (int r = num_row_blocks_e_; r < bs->rows.size(); ++r) {
307 const int row_block_pos = bs->rows[r].block.position;
308 const int row_block_size = bs->rows[r].block.size;
309 const std::vector<Cell>& cells = bs->rows[r].cells;
Austin Schuh3de38b02024-06-25 18:25:10 -0700310 for (const auto& cell : cells) {
311 const int col_block_id = cell.block_id;
Austin Schuh70cc9552019-01-21 19:46:48 -0800312 const int col_block_pos = bs->cols[col_block_id].position;
313 const int col_block_size = bs->cols[col_block_id].size;
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800314 // clang-format off
Austin Schuh70cc9552019-01-21 19:46:48 -0800315 MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
Austin Schuh3de38b02024-06-25 18:25:10 -0700316 values + cell.position, row_block_size, col_block_size,
Austin Schuh70cc9552019-01-21 19:46:48 -0800317 x + row_block_pos,
318 y + col_block_pos - num_cols_e_);
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800319 // clang-format on
Austin Schuh70cc9552019-01-21 19:46:48 -0800320 }
321 }
322}
323
Austin Schuh3de38b02024-06-25 18:25:10 -0700324template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
325void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
326 LeftMultiplyAndAccumulateFMultiThreaded(const double* x, double* y) const {
327 auto transpose_bs = matrix_.transpose_block_structure();
328 CHECK(transpose_bs != nullptr);
329 // Local copies of class members in order to avoid capturing pointer to the
330 // whole object in lambda function
331 auto values = matrix_.values();
332 const int num_row_blocks_e = num_row_blocks_e_;
333 const int num_cols_e = num_cols_e_;
334 ParallelFor(
335 options_.context,
336 num_col_blocks_e_,
337 num_col_blocks_e_ + num_col_blocks_f_,
338 options_.num_threads,
339 [values, transpose_bs, num_row_blocks_e, num_cols_e, x, y](
340 int row_block_id) {
341 int row_block_pos = transpose_bs->rows[row_block_id].block.position;
342 int row_block_size = transpose_bs->rows[row_block_id].block.size;
343 auto& cells = transpose_bs->rows[row_block_id].cells;
344
345 const int num_cells = cells.size();
346 int cell_idx = 0;
347 for (; cell_idx < num_cells; ++cell_idx) {
348 auto& cell = cells[cell_idx];
349 const int col_block_id = cell.block_id;
350 const int col_block_size = transpose_bs->cols[col_block_id].size;
351 const int col_block_pos = transpose_bs->cols[col_block_id].position;
352 if (col_block_id >= num_row_blocks_e) break;
353
354 MatrixTransposeVectorMultiply<kRowBlockSize, kFBlockSize, 1>(
355 values + cell.position,
356 col_block_size,
357 row_block_size,
358 x + col_block_pos,
359 y + row_block_pos - num_cols_e);
360 }
361 for (; cell_idx < num_cells; ++cell_idx) {
362 auto& cell = cells[cell_idx];
363 const int col_block_id = cell.block_id;
364 const int col_block_size = transpose_bs->cols[col_block_id].size;
365 const int col_block_pos = transpose_bs->cols[col_block_id].position;
366 MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
367 values + cell.position,
368 col_block_size,
369 row_block_size,
370 x + col_block_pos,
371 y + row_block_pos - num_cols_e);
372 }
373 },
374 f_cols_partition());
375}
376
Austin Schuh70cc9552019-01-21 19:46:48 -0800377// Given a range of columns blocks of a matrix m, compute the block
378// structure of the block diagonal of the matrix m(:,
379// start_col_block:end_col_block)'m(:, start_col_block:end_col_block)
Austin Schuh3de38b02024-06-25 18:25:10 -0700380// and return a BlockSparseMatrix with this block structure. The
Austin Schuh70cc9552019-01-21 19:46:48 -0800381// caller owns the result.
382template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
Austin Schuh3de38b02024-06-25 18:25:10 -0700383std::unique_ptr<BlockSparseMatrix>
Austin Schuh70cc9552019-01-21 19:46:48 -0800384PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800385 CreateBlockDiagonalMatrixLayout(int start_col_block,
386 int end_col_block) const {
Austin Schuh70cc9552019-01-21 19:46:48 -0800387 const CompressedRowBlockStructure* bs = matrix_.block_structure();
Austin Schuh3de38b02024-06-25 18:25:10 -0700388 auto* block_diagonal_structure = new CompressedRowBlockStructure;
Austin Schuh70cc9552019-01-21 19:46:48 -0800389
390 int block_position = 0;
391 int diagonal_cell_position = 0;
392
393 // Iterate over the column blocks, creating a new diagonal block for
394 // each column block.
395 for (int c = start_col_block; c < end_col_block; ++c) {
396 const Block& block = bs->cols[c];
Austin Schuh3de38b02024-06-25 18:25:10 -0700397 block_diagonal_structure->cols.emplace_back();
Austin Schuh70cc9552019-01-21 19:46:48 -0800398 Block& diagonal_block = block_diagonal_structure->cols.back();
399 diagonal_block.size = block.size;
400 diagonal_block.position = block_position;
401
Austin Schuh3de38b02024-06-25 18:25:10 -0700402 block_diagonal_structure->rows.emplace_back();
Austin Schuh70cc9552019-01-21 19:46:48 -0800403 CompressedRow& row = block_diagonal_structure->rows.back();
404 row.block = diagonal_block;
405
Austin Schuh3de38b02024-06-25 18:25:10 -0700406 row.cells.emplace_back();
Austin Schuh70cc9552019-01-21 19:46:48 -0800407 Cell& cell = row.cells.back();
408 cell.block_id = c - start_col_block;
409 cell.position = diagonal_cell_position;
410
411 block_position += block.size;
412 diagonal_cell_position += block.size * block.size;
413 }
414
415 // Build a BlockSparseMatrix with the just computed block
416 // structure.
Austin Schuh3de38b02024-06-25 18:25:10 -0700417 return std::make_unique<BlockSparseMatrix>(block_diagonal_structure);
Austin Schuh70cc9552019-01-21 19:46:48 -0800418}
419
420template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
Austin Schuh3de38b02024-06-25 18:25:10 -0700421std::unique_ptr<BlockSparseMatrix>
422PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
423 CreateBlockDiagonalEtE() const {
424 std::unique_ptr<BlockSparseMatrix> block_diagonal =
Austin Schuh70cc9552019-01-21 19:46:48 -0800425 CreateBlockDiagonalMatrixLayout(0, num_col_blocks_e_);
Austin Schuh3de38b02024-06-25 18:25:10 -0700426 UpdateBlockDiagonalEtE(block_diagonal.get());
Austin Schuh70cc9552019-01-21 19:46:48 -0800427 return block_diagonal;
428}
429
430template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
Austin Schuh3de38b02024-06-25 18:25:10 -0700431std::unique_ptr<BlockSparseMatrix>
432PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
433 CreateBlockDiagonalFtF() const {
434 std::unique_ptr<BlockSparseMatrix> block_diagonal =
435 CreateBlockDiagonalMatrixLayout(num_col_blocks_e_,
436 num_col_blocks_e_ + num_col_blocks_f_);
437 UpdateBlockDiagonalFtF(block_diagonal.get());
Austin Schuh70cc9552019-01-21 19:46:48 -0800438 return block_diagonal;
439}
440
Austin Schuh3de38b02024-06-25 18:25:10 -0700441// Similar to the code in RightMultiplyAndAccumulateE, except instead of the
442// matrix vector multiply its an outer product.
Austin Schuh70cc9552019-01-21 19:46:48 -0800443//
444// block_diagonal = block_diagonal(E'E)
445//
446template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800447void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
Austin Schuh3de38b02024-06-25 18:25:10 -0700448 UpdateBlockDiagonalEtESingleThreaded(
449 BlockSparseMatrix* block_diagonal) const {
450 auto bs = matrix_.block_structure();
451 auto block_diagonal_structure = block_diagonal->block_structure();
Austin Schuh70cc9552019-01-21 19:46:48 -0800452
453 block_diagonal->SetZero();
454 const double* values = matrix_.values();
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800455 for (int r = 0; r < num_row_blocks_e_; ++r) {
Austin Schuh70cc9552019-01-21 19:46:48 -0800456 const Cell& cell = bs->rows[r].cells[0];
457 const int row_block_size = bs->rows[r].block.size;
458 const int block_id = cell.block_id;
459 const int col_block_size = bs->cols[block_id].size;
460 const int cell_position =
461 block_diagonal_structure->rows[block_id].cells[0].position;
462
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800463 // clang-format off
Austin Schuh70cc9552019-01-21 19:46:48 -0800464 MatrixTransposeMatrixMultiply
465 <kRowBlockSize, kEBlockSize, kRowBlockSize, kEBlockSize, 1>(
466 values + cell.position, row_block_size, col_block_size,
467 values + cell.position, row_block_size, col_block_size,
468 block_diagonal->mutable_values() + cell_position,
469 0, 0, col_block_size, col_block_size);
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800470 // clang-format on
Austin Schuh70cc9552019-01-21 19:46:48 -0800471 }
472}
473
Austin Schuh3de38b02024-06-25 18:25:10 -0700474template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
475void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
476 UpdateBlockDiagonalEtEMultiThreaded(
477 BlockSparseMatrix* block_diagonal) const {
478 auto transpose_block_structure = matrix_.transpose_block_structure();
479 CHECK(transpose_block_structure != nullptr);
480 auto block_diagonal_structure = block_diagonal->block_structure();
481
482 const double* values = matrix_.values();
483 double* values_diagonal = block_diagonal->mutable_values();
484 ParallelFor(
485 options_.context,
486 0,
487 num_col_blocks_e_,
488 options_.num_threads,
489 [values,
490 transpose_block_structure,
491 values_diagonal,
492 block_diagonal_structure](int col_block_id) {
493 int cell_position =
494 block_diagonal_structure->rows[col_block_id].cells[0].position;
495 double* cell_values = values_diagonal + cell_position;
496 int col_block_size =
497 transpose_block_structure->rows[col_block_id].block.size;
498 auto& cells = transpose_block_structure->rows[col_block_id].cells;
499 MatrixRef(cell_values, col_block_size, col_block_size).setZero();
500
501 for (auto& c : cells) {
502 int row_block_size = transpose_block_structure->cols[c.block_id].size;
503 // clang-format off
504 MatrixTransposeMatrixMultiply<kRowBlockSize, kEBlockSize, kRowBlockSize, kEBlockSize, 1>(
505 values + c.position, row_block_size, col_block_size,
506 values + c.position, row_block_size, col_block_size,
507 cell_values, 0, 0, col_block_size, col_block_size);
508 // clang-format on
509 }
510 },
511 e_cols_partition_);
512}
513
514template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
515void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
516 UpdateBlockDiagonalEtE(BlockSparseMatrix* block_diagonal) const {
517 if (options_.num_threads == 1) {
518 UpdateBlockDiagonalEtESingleThreaded(block_diagonal);
519 } else {
520 CHECK(options_.context != nullptr);
521 UpdateBlockDiagonalEtEMultiThreaded(block_diagonal);
522 }
523}
524
525// Similar to the code in RightMultiplyAndAccumulateF, except instead of the
526// matrix vector multiply its an outer product.
Austin Schuh70cc9552019-01-21 19:46:48 -0800527//
528// block_diagonal = block_diagonal(F'F)
529//
530template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800531void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
Austin Schuh3de38b02024-06-25 18:25:10 -0700532 UpdateBlockDiagonalFtFSingleThreaded(
533 BlockSparseMatrix* block_diagonal) const {
534 auto bs = matrix_.block_structure();
535 auto block_diagonal_structure = block_diagonal->block_structure();
Austin Schuh70cc9552019-01-21 19:46:48 -0800536
537 block_diagonal->SetZero();
538 const double* values = matrix_.values();
539 for (int r = 0; r < num_row_blocks_e_; ++r) {
540 const int row_block_size = bs->rows[r].block.size;
541 const std::vector<Cell>& cells = bs->rows[r].cells;
542 for (int c = 1; c < cells.size(); ++c) {
543 const int col_block_id = cells[c].block_id;
544 const int col_block_size = bs->cols[col_block_id].size;
545 const int diagonal_block_id = col_block_id - num_col_blocks_e_;
546 const int cell_position =
547 block_diagonal_structure->rows[diagonal_block_id].cells[0].position;
548
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800549 // clang-format off
Austin Schuh70cc9552019-01-21 19:46:48 -0800550 MatrixTransposeMatrixMultiply
551 <kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>(
552 values + cells[c].position, row_block_size, col_block_size,
553 values + cells[c].position, row_block_size, col_block_size,
554 block_diagonal->mutable_values() + cell_position,
555 0, 0, col_block_size, col_block_size);
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800556 // clang-format on
Austin Schuh70cc9552019-01-21 19:46:48 -0800557 }
558 }
559
560 for (int r = num_row_blocks_e_; r < bs->rows.size(); ++r) {
561 const int row_block_size = bs->rows[r].block.size;
562 const std::vector<Cell>& cells = bs->rows[r].cells;
Austin Schuh3de38b02024-06-25 18:25:10 -0700563 for (const auto& cell : cells) {
564 const int col_block_id = cell.block_id;
Austin Schuh70cc9552019-01-21 19:46:48 -0800565 const int col_block_size = bs->cols[col_block_id].size;
566 const int diagonal_block_id = col_block_id - num_col_blocks_e_;
567 const int cell_position =
568 block_diagonal_structure->rows[diagonal_block_id].cells[0].position;
569
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800570 // clang-format off
Austin Schuh70cc9552019-01-21 19:46:48 -0800571 MatrixTransposeMatrixMultiply
572 <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
Austin Schuh3de38b02024-06-25 18:25:10 -0700573 values + cell.position, row_block_size, col_block_size,
574 values + cell.position, row_block_size, col_block_size,
Austin Schuh70cc9552019-01-21 19:46:48 -0800575 block_diagonal->mutable_values() + cell_position,
576 0, 0, col_block_size, col_block_size);
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800577 // clang-format on
Austin Schuh70cc9552019-01-21 19:46:48 -0800578 }
579 }
580}
581
Austin Schuh3de38b02024-06-25 18:25:10 -0700582template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
583void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
584 UpdateBlockDiagonalFtFMultiThreaded(
585 BlockSparseMatrix* block_diagonal) const {
586 auto transpose_block_structure = matrix_.transpose_block_structure();
587 CHECK(transpose_block_structure != nullptr);
588 auto block_diagonal_structure = block_diagonal->block_structure();
589
590 const double* values = matrix_.values();
591 double* values_diagonal = block_diagonal->mutable_values();
592
593 const int num_col_blocks_e = num_col_blocks_e_;
594 const int num_row_blocks_e = num_row_blocks_e_;
595 ParallelFor(
596 options_.context,
597 num_col_blocks_e_,
598 num_col_blocks_e + num_col_blocks_f_,
599 options_.num_threads,
600 [transpose_block_structure,
601 block_diagonal_structure,
602 num_col_blocks_e,
603 num_row_blocks_e,
604 values,
605 values_diagonal](int col_block_id) {
606 const int col_block_size =
607 transpose_block_structure->rows[col_block_id].block.size;
608 const int diagonal_block_id = col_block_id - num_col_blocks_e;
609 const int cell_position =
610 block_diagonal_structure->rows[diagonal_block_id].cells[0].position;
611 double* cell_values = values_diagonal + cell_position;
612
613 MatrixRef(cell_values, col_block_size, col_block_size).setZero();
614
615 auto& cells = transpose_block_structure->rows[col_block_id].cells;
616 const int num_cells = cells.size();
617 int i = 0;
618 for (; i < num_cells; ++i) {
619 auto& cell = cells[i];
620 const int row_block_id = cell.block_id;
621 if (row_block_id >= num_row_blocks_e) break;
622 const int row_block_size =
623 transpose_block_structure->cols[row_block_id].size;
624 // clang-format off
625 MatrixTransposeMatrixMultiply
626 <kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>(
627 values + cell.position, row_block_size, col_block_size,
628 values + cell.position, row_block_size, col_block_size,
629 cell_values, 0, 0, col_block_size, col_block_size);
630 // clang-format on
631 }
632 for (; i < num_cells; ++i) {
633 auto& cell = cells[i];
634 const int row_block_id = cell.block_id;
635 const int row_block_size =
636 transpose_block_structure->cols[row_block_id].size;
637 // clang-format off
638 MatrixTransposeMatrixMultiply
639 <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
640 values + cell.position, row_block_size, col_block_size,
641 values + cell.position, row_block_size, col_block_size,
642 cell_values, 0, 0, col_block_size, col_block_size);
643 // clang-format on
644 }
645 },
646 f_cols_partition_);
647}
648
649template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
650void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::
651 UpdateBlockDiagonalFtF(BlockSparseMatrix* block_diagonal) const {
652 if (options_.num_threads == 1) {
653 UpdateBlockDiagonalFtFSingleThreaded(block_diagonal);
654 } else {
655 CHECK(options_.context != nullptr);
656 UpdateBlockDiagonalFtFMultiThreaded(block_diagonal);
657 }
658}
659
660} // namespace ceres::internal