blob: 2efee398e5138add6fb5f6eb368492f0468c9533 [file] [log] [blame]
Austin Schuh70cc9552019-01-21 19:46:48 -08001// Ceres Solver - A fast non-linear least squares minimizer
Austin Schuh3de38b02024-06-25 18:25:10 -07002// Copyright 2023 Google Inc. All rights reserved.
Austin Schuh70cc9552019-01-21 19:46:48 -08003// http://ceres-solver.org/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7//
8// * Redistributions of source code must retain the above copyright notice,
9// this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above copyright notice,
11// this list of conditions and the following disclaimer in the documentation
12// and/or other materials provided with the distribution.
13// * Neither the name of Google Inc. nor the names of its contributors may be
14// used to endorse or promote products derived from this software without
15// specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27// POSSIBILITY OF SUCH DAMAGE.
28//
29// Author: sameeragarwal@google.com (Sameer Agarwal)
30
31#include "ceres/block_sparse_matrix.h"
32
Austin Schuh70cc9552019-01-21 19:46:48 -080033#include <algorithm>
Austin Schuh1d1e6ea2020-12-23 21:56:30 -080034#include <cstddef>
Austin Schuh3de38b02024-06-25 18:25:10 -070035#include <memory>
36#include <numeric>
37#include <random>
Austin Schuh70cc9552019-01-21 19:46:48 -080038#include <vector>
Austin Schuh1d1e6ea2020-12-23 21:56:30 -080039
Austin Schuh70cc9552019-01-21 19:46:48 -080040#include "ceres/block_structure.h"
Austin Schuh3de38b02024-06-25 18:25:10 -070041#include "ceres/crs_matrix.h"
Austin Schuh70cc9552019-01-21 19:46:48 -080042#include "ceres/internal/eigen.h"
Austin Schuh3de38b02024-06-25 18:25:10 -070043#include "ceres/parallel_for.h"
44#include "ceres/parallel_vector_ops.h"
Austin Schuh70cc9552019-01-21 19:46:48 -080045#include "ceres/small_blas.h"
46#include "ceres/triplet_sparse_matrix.h"
47#include "glog/logging.h"
48
Austin Schuh3de38b02024-06-25 18:25:10 -070049#ifndef CERES_NO_CUDA
50#include "cuda_runtime.h"
51#endif
Austin Schuh70cc9552019-01-21 19:46:48 -080052
Austin Schuh3de38b02024-06-25 18:25:10 -070053namespace ceres::internal {
Austin Schuh70cc9552019-01-21 19:46:48 -080054
Austin Schuh3de38b02024-06-25 18:25:10 -070055namespace {
56void ComputeCumulativeNumberOfNonZeros(std::vector<CompressedList>& rows) {
57 if (rows.empty()) {
58 return;
59 }
60 rows[0].cumulative_nnz = rows[0].nnz;
61 for (int c = 1; c < rows.size(); ++c) {
62 const int curr_nnz = rows[c].nnz;
63 rows[c].cumulative_nnz = curr_nnz + rows[c - 1].cumulative_nnz;
64 }
65}
66
67template <bool transpose>
68std::unique_ptr<CompressedRowSparseMatrix>
69CreateStructureOfCompressedRowSparseMatrix(
70 int num_rows,
71 int num_cols,
72 int num_nonzeros,
73 const CompressedRowBlockStructure* block_structure) {
74 auto crs_matrix = std::make_unique<CompressedRowSparseMatrix>(
75 num_rows, num_cols, num_nonzeros);
76 auto crs_cols = crs_matrix->mutable_cols();
77 auto crs_rows = crs_matrix->mutable_rows();
78 int value_offset = 0;
79 const int num_row_blocks = block_structure->rows.size();
80 const auto& cols = block_structure->cols;
81 *crs_rows++ = 0;
82 for (int row_block_id = 0; row_block_id < num_row_blocks; ++row_block_id) {
83 const auto& row_block = block_structure->rows[row_block_id];
84 // Empty row block: only requires setting row offsets
85 if (row_block.cells.empty()) {
86 std::fill(crs_rows, crs_rows + row_block.block.size, value_offset);
87 crs_rows += row_block.block.size;
88 continue;
89 }
90
91 int row_nnz = 0;
92 if constexpr (transpose) {
93 // Transposed block structure comes with nnz in row-block filled-in
94 row_nnz = row_block.nnz / row_block.block.size;
95 } else {
96 // Nnz field of non-transposed block structure is not filled and it can
97 // have non-sequential structure (consider the case of jacobian for
98 // Schur-complement solver: E and F blocks are stored separately).
99 for (auto& c : row_block.cells) {
100 row_nnz += cols[c.block_id].size;
101 }
102 }
103
104 // Row-wise setup of matrix structure
105 for (int row = 0; row < row_block.block.size; ++row) {
106 value_offset += row_nnz;
107 *crs_rows++ = value_offset;
108 for (auto& c : row_block.cells) {
109 const int col_block_size = cols[c.block_id].size;
110 const int col_position = cols[c.block_id].position;
111 std::iota(crs_cols, crs_cols + col_block_size, col_position);
112 crs_cols += col_block_size;
113 }
114 }
115 }
116 return crs_matrix;
117}
118
119template <bool transpose>
120void UpdateCompressedRowSparseMatrixImpl(
121 CompressedRowSparseMatrix* crs_matrix,
122 const double* values,
123 const CompressedRowBlockStructure* block_structure) {
124 auto crs_values = crs_matrix->mutable_values();
125 auto crs_rows = crs_matrix->mutable_rows();
126 const int num_row_blocks = block_structure->rows.size();
127 const auto& cols = block_structure->cols;
128 for (int row_block_id = 0; row_block_id < num_row_blocks; ++row_block_id) {
129 const auto& row_block = block_structure->rows[row_block_id];
130 const int row_block_size = row_block.block.size;
131 const int row_nnz = crs_rows[1] - crs_rows[0];
132 crs_rows += row_block_size;
133
134 if (row_nnz == 0) {
135 continue;
136 }
137
138 MatrixRef crs_row_block(crs_values, row_block_size, row_nnz);
139 int col_offset = 0;
140 for (auto& c : row_block.cells) {
141 const int col_block_size = cols[c.block_id].size;
142 auto crs_cell =
143 crs_row_block.block(0, col_offset, row_block_size, col_block_size);
144 if constexpr (transpose) {
145 // Transposed matrix is filled using transposed block-strucutre
146 ConstMatrixRef cell(
147 values + c.position, col_block_size, row_block_size);
148 crs_cell = cell.transpose();
149 } else {
150 ConstMatrixRef cell(
151 values + c.position, row_block_size, col_block_size);
152 crs_cell = cell;
153 }
154 col_offset += col_block_size;
155 }
156 crs_values += row_nnz * row_block_size;
157 }
158}
159
160void SetBlockStructureOfCompressedRowSparseMatrix(
161 CompressedRowSparseMatrix* crs_matrix,
162 CompressedRowBlockStructure* block_structure) {
163 const int num_row_blocks = block_structure->rows.size();
164 auto& row_blocks = *crs_matrix->mutable_row_blocks();
165 row_blocks.resize(num_row_blocks);
166 for (int i = 0; i < num_row_blocks; ++i) {
167 row_blocks[i] = block_structure->rows[i].block;
168 }
169
170 auto& col_blocks = *crs_matrix->mutable_col_blocks();
171 col_blocks = block_structure->cols;
172}
173
174} // namespace
Austin Schuh70cc9552019-01-21 19:46:48 -0800175
176BlockSparseMatrix::BlockSparseMatrix(
Austin Schuh3de38b02024-06-25 18:25:10 -0700177 CompressedRowBlockStructure* block_structure, bool use_page_locked_memory)
178 : use_page_locked_memory_(use_page_locked_memory),
179 num_rows_(0),
Austin Schuh70cc9552019-01-21 19:46:48 -0800180 num_cols_(0),
181 num_nonzeros_(0),
182 block_structure_(block_structure) {
183 CHECK(block_structure_ != nullptr);
184
185 // Count the number of columns in the matrix.
Austin Schuh3de38b02024-06-25 18:25:10 -0700186 for (auto& col : block_structure_->cols) {
187 num_cols_ += col.size;
Austin Schuh70cc9552019-01-21 19:46:48 -0800188 }
189
190 // Count the number of non-zero entries and the number of rows in
191 // the matrix.
192 for (int i = 0; i < block_structure_->rows.size(); ++i) {
193 int row_block_size = block_structure_->rows[i].block.size;
194 num_rows_ += row_block_size;
195
Austin Schuh3de38b02024-06-25 18:25:10 -0700196 const std::vector<Cell>& cells = block_structure_->rows[i].cells;
197 for (const auto& cell : cells) {
198 int col_block_id = cell.block_id;
Austin Schuh70cc9552019-01-21 19:46:48 -0800199 int col_block_size = block_structure_->cols[col_block_id].size;
200 num_nonzeros_ += col_block_size * row_block_size;
201 }
202 }
203
204 CHECK_GE(num_rows_, 0);
205 CHECK_GE(num_cols_, 0);
206 CHECK_GE(num_nonzeros_, 0);
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800207 VLOG(2) << "Allocating values array with " << num_nonzeros_ * sizeof(double)
208 << " bytes."; // NOLINT
Austin Schuh3de38b02024-06-25 18:25:10 -0700209
210 values_ = AllocateValues(num_nonzeros_);
Austin Schuh70cc9552019-01-21 19:46:48 -0800211 max_num_nonzeros_ = num_nonzeros_;
212 CHECK(values_ != nullptr);
Austin Schuh3de38b02024-06-25 18:25:10 -0700213 AddTransposeBlockStructure();
Austin Schuh70cc9552019-01-21 19:46:48 -0800214}
215
Austin Schuh3de38b02024-06-25 18:25:10 -0700216BlockSparseMatrix::~BlockSparseMatrix() { FreeValues(values_); }
Austin Schuh70cc9552019-01-21 19:46:48 -0800217
Austin Schuh3de38b02024-06-25 18:25:10 -0700218void BlockSparseMatrix::AddTransposeBlockStructure() {
219 if (transpose_block_structure_ == nullptr) {
220 transpose_block_structure_ = CreateTranspose(*block_structure_);
Austin Schuh70cc9552019-01-21 19:46:48 -0800221 }
222}
223
Austin Schuh3de38b02024-06-25 18:25:10 -0700224void BlockSparseMatrix::SetZero() {
225 std::fill(values_, values_ + num_nonzeros_, 0.0);
226}
227
228void BlockSparseMatrix::SetZero(ContextImpl* context, int num_threads) {
229 ParallelSetZero(context, num_threads, values_, num_nonzeros_);
230}
231
232void BlockSparseMatrix::RightMultiplyAndAccumulate(const double* x,
233 double* y) const {
234 RightMultiplyAndAccumulate(x, y, nullptr, 1);
235}
236
237void BlockSparseMatrix::RightMultiplyAndAccumulate(const double* x,
238 double* y,
239 ContextImpl* context,
240 int num_threads) const {
Austin Schuh70cc9552019-01-21 19:46:48 -0800241 CHECK(x != nullptr);
242 CHECK(y != nullptr);
243
Austin Schuh3de38b02024-06-25 18:25:10 -0700244 const auto values = values_;
245 const auto block_structure = block_structure_.get();
246 const auto num_row_blocks = block_structure->rows.size();
247
248 ParallelFor(context,
249 0,
250 num_row_blocks,
251 num_threads,
252 [values, block_structure, x, y](int row_block_id) {
253 const int row_block_pos =
254 block_structure->rows[row_block_id].block.position;
255 const int row_block_size =
256 block_structure->rows[row_block_id].block.size;
257 const auto& cells = block_structure->rows[row_block_id].cells;
258 for (const auto& cell : cells) {
259 const int col_block_id = cell.block_id;
260 const int col_block_size =
261 block_structure->cols[col_block_id].size;
262 const int col_block_pos =
263 block_structure->cols[col_block_id].position;
264 MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
265 values + cell.position,
266 row_block_size,
267 col_block_size,
268 x + col_block_pos,
269 y + row_block_pos);
270 }
271 });
272}
273
274// TODO(https://github.com/ceres-solver/ceres-solver/issues/933): This method
275// might benefit from caching column-block partition
276void BlockSparseMatrix::LeftMultiplyAndAccumulate(const double* x,
277 double* y,
278 ContextImpl* context,
279 int num_threads) const {
280 // While utilizing transposed structure allows to perform parallel
281 // left-multiplication by dense vector, it makes access patterns to matrix
282 // elements scattered. Thus, multiplication using transposed structure
283 // is only useful for parallel execution
284 CHECK(x != nullptr);
285 CHECK(y != nullptr);
286 if (transpose_block_structure_ == nullptr || num_threads == 1) {
287 LeftMultiplyAndAccumulate(x, y);
288 return;
289 }
290
291 auto transpose_bs = transpose_block_structure_.get();
292 const auto values = values_;
293 const int num_col_blocks = transpose_bs->rows.size();
294 if (!num_col_blocks) {
295 return;
296 }
297
298 // Use non-zero count as iteration cost for guided parallel-for loop
299 ParallelFor(
300 context,
301 0,
302 num_col_blocks,
303 num_threads,
304 [values, transpose_bs, x, y](int row_block_id) {
305 int row_block_pos = transpose_bs->rows[row_block_id].block.position;
306 int row_block_size = transpose_bs->rows[row_block_id].block.size;
307 auto& cells = transpose_bs->rows[row_block_id].cells;
308
309 for (auto& cell : cells) {
310 const int col_block_id = cell.block_id;
311 const int col_block_size = transpose_bs->cols[col_block_id].size;
312 const int col_block_pos = transpose_bs->cols[col_block_id].position;
313 MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
314 values + cell.position,
315 col_block_size,
316 row_block_size,
317 x + col_block_pos,
318 y + row_block_pos);
319 }
320 },
321 transpose_bs->rows.data(),
322 [](const CompressedRow& row) { return row.cumulative_nnz; });
323}
324
325void BlockSparseMatrix::LeftMultiplyAndAccumulate(const double* x,
326 double* y) const {
327 CHECK(x != nullptr);
328 CHECK(y != nullptr);
329 // Single-threaded left products are always computed using a non-transpose
330 // block structure, because it has linear acess pattern to matrix elements
Austin Schuh70cc9552019-01-21 19:46:48 -0800331 for (int i = 0; i < block_structure_->rows.size(); ++i) {
332 int row_block_pos = block_structure_->rows[i].block.position;
333 int row_block_size = block_structure_->rows[i].block.size;
Austin Schuh3de38b02024-06-25 18:25:10 -0700334 const auto& cells = block_structure_->rows[i].cells;
335 for (const auto& cell : cells) {
336 int col_block_id = cell.block_id;
Austin Schuh70cc9552019-01-21 19:46:48 -0800337 int col_block_size = block_structure_->cols[col_block_id].size;
338 int col_block_pos = block_structure_->cols[col_block_id].position;
339 MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
Austin Schuh3de38b02024-06-25 18:25:10 -0700340 values_ + cell.position,
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800341 row_block_size,
342 col_block_size,
Austin Schuh70cc9552019-01-21 19:46:48 -0800343 x + row_block_pos,
344 y + col_block_pos);
345 }
346 }
347}
348
349void BlockSparseMatrix::SquaredColumnNorm(double* x) const {
350 CHECK(x != nullptr);
351 VectorRef(x, num_cols_).setZero();
352 for (int i = 0; i < block_structure_->rows.size(); ++i) {
353 int row_block_size = block_structure_->rows[i].block.size;
Austin Schuh3de38b02024-06-25 18:25:10 -0700354 auto& cells = block_structure_->rows[i].cells;
355 for (const auto& cell : cells) {
356 int col_block_id = cell.block_id;
Austin Schuh70cc9552019-01-21 19:46:48 -0800357 int col_block_size = block_structure_->cols[col_block_id].size;
358 int col_block_pos = block_structure_->cols[col_block_id].position;
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800359 const MatrixRef m(
Austin Schuh3de38b02024-06-25 18:25:10 -0700360 values_ + cell.position, row_block_size, col_block_size);
Austin Schuh70cc9552019-01-21 19:46:48 -0800361 VectorRef(x + col_block_pos, col_block_size) += m.colwise().squaredNorm();
362 }
363 }
364}
365
Austin Schuh3de38b02024-06-25 18:25:10 -0700366// TODO(https://github.com/ceres-solver/ceres-solver/issues/933): This method
367// might benefit from caching column-block partition
368void BlockSparseMatrix::SquaredColumnNorm(double* x,
369 ContextImpl* context,
370 int num_threads) const {
371 if (transpose_block_structure_ == nullptr || num_threads == 1) {
372 SquaredColumnNorm(x);
373 return;
374 }
375
376 CHECK(x != nullptr);
377 ParallelSetZero(context, num_threads, x, num_cols_);
378
379 auto transpose_bs = transpose_block_structure_.get();
380 const auto values = values_;
381 const int num_col_blocks = transpose_bs->rows.size();
382 ParallelFor(
383 context,
384 0,
385 num_col_blocks,
386 num_threads,
387 [values, transpose_bs, x](int row_block_id) {
388 const auto& row = transpose_bs->rows[row_block_id];
389
390 for (auto& cell : row.cells) {
391 const auto& col = transpose_bs->cols[cell.block_id];
392 const MatrixRef m(values + cell.position, col.size, row.block.size);
393 VectorRef(x + row.block.position, row.block.size) +=
394 m.colwise().squaredNorm();
395 }
396 },
397 transpose_bs->rows.data(),
398 [](const CompressedRow& row) { return row.cumulative_nnz; });
399}
400
Austin Schuh70cc9552019-01-21 19:46:48 -0800401void BlockSparseMatrix::ScaleColumns(const double* scale) {
402 CHECK(scale != nullptr);
403
404 for (int i = 0; i < block_structure_->rows.size(); ++i) {
405 int row_block_size = block_structure_->rows[i].block.size;
Austin Schuh3de38b02024-06-25 18:25:10 -0700406 auto& cells = block_structure_->rows[i].cells;
407 for (const auto& cell : cells) {
408 int col_block_id = cell.block_id;
Austin Schuh70cc9552019-01-21 19:46:48 -0800409 int col_block_size = block_structure_->cols[col_block_id].size;
410 int col_block_pos = block_structure_->cols[col_block_id].position;
Austin Schuh3de38b02024-06-25 18:25:10 -0700411 MatrixRef m(values_ + cell.position, row_block_size, col_block_size);
Austin Schuh70cc9552019-01-21 19:46:48 -0800412 m *= ConstVectorRef(scale + col_block_pos, col_block_size).asDiagonal();
413 }
414 }
415}
416
Austin Schuh3de38b02024-06-25 18:25:10 -0700417// TODO(https://github.com/ceres-solver/ceres-solver/issues/933): This method
418// might benefit from caching column-block partition
419void BlockSparseMatrix::ScaleColumns(const double* scale,
420 ContextImpl* context,
421 int num_threads) {
422 if (transpose_block_structure_ == nullptr || num_threads == 1) {
423 ScaleColumns(scale);
424 return;
425 }
426
427 CHECK(scale != nullptr);
428 auto transpose_bs = transpose_block_structure_.get();
429 auto values = values_;
430 const int num_col_blocks = transpose_bs->rows.size();
431 ParallelFor(
432 context,
433 0,
434 num_col_blocks,
435 num_threads,
436 [values, transpose_bs, scale](int row_block_id) {
437 const auto& row = transpose_bs->rows[row_block_id];
438
439 for (auto& cell : row.cells) {
440 const auto& col = transpose_bs->cols[cell.block_id];
441 MatrixRef m(values + cell.position, col.size, row.block.size);
442 m *= ConstVectorRef(scale + row.block.position, row.block.size)
443 .asDiagonal();
444 }
445 },
446 transpose_bs->rows.data(),
447 [](const CompressedRow& row) { return row.cumulative_nnz; });
448}
449std::unique_ptr<CompressedRowSparseMatrix>
450BlockSparseMatrix::ToCompressedRowSparseMatrixTranspose() const {
451 auto bs = transpose_block_structure_.get();
452 auto crs_matrix = CreateStructureOfCompressedRowSparseMatrix<true>(
453 num_cols_, num_rows_, num_nonzeros_, bs);
454
455 SetBlockStructureOfCompressedRowSparseMatrix(crs_matrix.get(), bs);
456
457 UpdateCompressedRowSparseMatrixTranspose(crs_matrix.get());
458 return crs_matrix;
459}
460
461std::unique_ptr<CompressedRowSparseMatrix>
462BlockSparseMatrix::ToCompressedRowSparseMatrix() const {
463 auto crs_matrix = CreateStructureOfCompressedRowSparseMatrix<false>(
464 num_rows_, num_cols_, num_nonzeros_, block_structure_.get());
465
466 SetBlockStructureOfCompressedRowSparseMatrix(crs_matrix.get(),
467 block_structure_.get());
468
469 UpdateCompressedRowSparseMatrix(crs_matrix.get());
470 return crs_matrix;
471}
472
473void BlockSparseMatrix::UpdateCompressedRowSparseMatrixTranspose(
474 CompressedRowSparseMatrix* crs_matrix) const {
475 CHECK(crs_matrix != nullptr);
476 CHECK_EQ(crs_matrix->num_rows(), num_cols_);
477 CHECK_EQ(crs_matrix->num_cols(), num_rows_);
478 CHECK_EQ(crs_matrix->num_nonzeros(), num_nonzeros_);
479 UpdateCompressedRowSparseMatrixImpl<true>(
480 crs_matrix, values(), transpose_block_structure_.get());
481}
482void BlockSparseMatrix::UpdateCompressedRowSparseMatrix(
483 CompressedRowSparseMatrix* crs_matrix) const {
484 CHECK(crs_matrix != nullptr);
485 CHECK_EQ(crs_matrix->num_rows(), num_rows_);
486 CHECK_EQ(crs_matrix->num_cols(), num_cols_);
487 CHECK_EQ(crs_matrix->num_nonzeros(), num_nonzeros_);
488 UpdateCompressedRowSparseMatrixImpl<false>(
489 crs_matrix, values(), block_structure_.get());
490}
491
Austin Schuh70cc9552019-01-21 19:46:48 -0800492void BlockSparseMatrix::ToDenseMatrix(Matrix* dense_matrix) const {
493 CHECK(dense_matrix != nullptr);
494
495 dense_matrix->resize(num_rows_, num_cols_);
496 dense_matrix->setZero();
497 Matrix& m = *dense_matrix;
498
499 for (int i = 0; i < block_structure_->rows.size(); ++i) {
500 int row_block_pos = block_structure_->rows[i].block.position;
501 int row_block_size = block_structure_->rows[i].block.size;
Austin Schuh3de38b02024-06-25 18:25:10 -0700502 auto& cells = block_structure_->rows[i].cells;
503 for (const auto& cell : cells) {
504 int col_block_id = cell.block_id;
Austin Schuh70cc9552019-01-21 19:46:48 -0800505 int col_block_size = block_structure_->cols[col_block_id].size;
506 int col_block_pos = block_structure_->cols[col_block_id].position;
Austin Schuh3de38b02024-06-25 18:25:10 -0700507 int jac_pos = cell.position;
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800508 m.block(row_block_pos, col_block_pos, row_block_size, col_block_size) +=
Austin Schuh3de38b02024-06-25 18:25:10 -0700509 MatrixRef(values_ + jac_pos, row_block_size, col_block_size);
Austin Schuh70cc9552019-01-21 19:46:48 -0800510 }
511 }
512}
513
514void BlockSparseMatrix::ToTripletSparseMatrix(
515 TripletSparseMatrix* matrix) const {
516 CHECK(matrix != nullptr);
517
518 matrix->Reserve(num_nonzeros_);
519 matrix->Resize(num_rows_, num_cols_);
520 matrix->SetZero();
521
522 for (int i = 0; i < block_structure_->rows.size(); ++i) {
523 int row_block_pos = block_structure_->rows[i].block.position;
524 int row_block_size = block_structure_->rows[i].block.size;
Austin Schuh3de38b02024-06-25 18:25:10 -0700525 const auto& cells = block_structure_->rows[i].cells;
526 for (const auto& cell : cells) {
527 int col_block_id = cell.block_id;
Austin Schuh70cc9552019-01-21 19:46:48 -0800528 int col_block_size = block_structure_->cols[col_block_id].size;
529 int col_block_pos = block_structure_->cols[col_block_id].position;
Austin Schuh3de38b02024-06-25 18:25:10 -0700530 int jac_pos = cell.position;
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800531 for (int r = 0; r < row_block_size; ++r) {
Austin Schuh70cc9552019-01-21 19:46:48 -0800532 for (int c = 0; c < col_block_size; ++c, ++jac_pos) {
533 matrix->mutable_rows()[jac_pos] = row_block_pos + r;
534 matrix->mutable_cols()[jac_pos] = col_block_pos + c;
535 matrix->mutable_values()[jac_pos] = values_[jac_pos];
536 }
537 }
538 }
539 }
540 matrix->set_num_nonzeros(num_nonzeros_);
541}
542
543// Return a pointer to the block structure. We continue to hold
544// ownership of the object though.
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800545const CompressedRowBlockStructure* BlockSparseMatrix::block_structure() const {
Austin Schuh70cc9552019-01-21 19:46:48 -0800546 return block_structure_.get();
547}
548
Austin Schuh3de38b02024-06-25 18:25:10 -0700549// Return a pointer to the block structure of matrix transpose. We continue to
550// hold ownership of the object though.
551const CompressedRowBlockStructure*
552BlockSparseMatrix::transpose_block_structure() const {
553 return transpose_block_structure_.get();
554}
555
Austin Schuh70cc9552019-01-21 19:46:48 -0800556void BlockSparseMatrix::ToTextFile(FILE* file) const {
557 CHECK(file != nullptr);
558 for (int i = 0; i < block_structure_->rows.size(); ++i) {
559 const int row_block_pos = block_structure_->rows[i].block.position;
560 const int row_block_size = block_structure_->rows[i].block.size;
Austin Schuh3de38b02024-06-25 18:25:10 -0700561 const auto& cells = block_structure_->rows[i].cells;
562 for (const auto& cell : cells) {
563 const int col_block_id = cell.block_id;
Austin Schuh70cc9552019-01-21 19:46:48 -0800564 const int col_block_size = block_structure_->cols[col_block_id].size;
565 const int col_block_pos = block_structure_->cols[col_block_id].position;
Austin Schuh3de38b02024-06-25 18:25:10 -0700566 int jac_pos = cell.position;
Austin Schuh70cc9552019-01-21 19:46:48 -0800567 for (int r = 0; r < row_block_size; ++r) {
568 for (int c = 0; c < col_block_size; ++c) {
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800569 fprintf(file,
570 "% 10d % 10d %17f\n",
Austin Schuh70cc9552019-01-21 19:46:48 -0800571 row_block_pos + r,
572 col_block_pos + c,
573 values_[jac_pos++]);
574 }
575 }
576 }
577 }
578}
579
Austin Schuh3de38b02024-06-25 18:25:10 -0700580std::unique_ptr<BlockSparseMatrix> BlockSparseMatrix::CreateDiagonalMatrix(
Austin Schuh70cc9552019-01-21 19:46:48 -0800581 const double* diagonal, const std::vector<Block>& column_blocks) {
582 // Create the block structure for the diagonal matrix.
Austin Schuh3de38b02024-06-25 18:25:10 -0700583 auto* bs = new CompressedRowBlockStructure();
Austin Schuh70cc9552019-01-21 19:46:48 -0800584 bs->cols = column_blocks;
585 int position = 0;
586 bs->rows.resize(column_blocks.size(), CompressedRow(1));
587 for (int i = 0; i < column_blocks.size(); ++i) {
588 CompressedRow& row = bs->rows[i];
589 row.block = column_blocks[i];
590 Cell& cell = row.cells[0];
591 cell.block_id = i;
592 cell.position = position;
593 position += row.block.size * row.block.size;
594 }
595
596 // Create the BlockSparseMatrix with the given block structure.
Austin Schuh3de38b02024-06-25 18:25:10 -0700597 auto matrix = std::make_unique<BlockSparseMatrix>(bs);
Austin Schuh70cc9552019-01-21 19:46:48 -0800598 matrix->SetZero();
599
600 // Fill the values array of the block sparse matrix.
601 double* values = matrix->mutable_values();
Austin Schuh3de38b02024-06-25 18:25:10 -0700602 for (const auto& column_block : column_blocks) {
603 const int size = column_block.size;
Austin Schuh70cc9552019-01-21 19:46:48 -0800604 for (int j = 0; j < size; ++j) {
605 // (j + 1) * size is compact way of accessing the (j,j) entry.
606 values[j * (size + 1)] = diagonal[j];
607 }
608 diagonal += size;
609 values += size * size;
610 }
611
612 return matrix;
613}
614
615void BlockSparseMatrix::AppendRows(const BlockSparseMatrix& m) {
616 CHECK_EQ(m.num_cols(), num_cols());
617 const CompressedRowBlockStructure* m_bs = m.block_structure();
618 CHECK_EQ(m_bs->cols.size(), block_structure_->cols.size());
619
620 const int old_num_nonzeros = num_nonzeros_;
621 const int old_num_row_blocks = block_structure_->rows.size();
622 block_structure_->rows.resize(old_num_row_blocks + m_bs->rows.size());
623
624 for (int i = 0; i < m_bs->rows.size(); ++i) {
625 const CompressedRow& m_row = m_bs->rows[i];
Austin Schuh3de38b02024-06-25 18:25:10 -0700626 const int row_block_id = old_num_row_blocks + i;
627 CompressedRow& row = block_structure_->rows[row_block_id];
Austin Schuh70cc9552019-01-21 19:46:48 -0800628 row.block.size = m_row.block.size;
629 row.block.position = num_rows_;
630 num_rows_ += m_row.block.size;
631 row.cells.resize(m_row.cells.size());
Austin Schuh3de38b02024-06-25 18:25:10 -0700632 if (transpose_block_structure_) {
633 transpose_block_structure_->cols.emplace_back(row.block);
634 }
Austin Schuh70cc9552019-01-21 19:46:48 -0800635 for (int c = 0; c < m_row.cells.size(); ++c) {
636 const int block_id = m_row.cells[c].block_id;
637 row.cells[c].block_id = block_id;
638 row.cells[c].position = num_nonzeros_;
Austin Schuh3de38b02024-06-25 18:25:10 -0700639
640 const int cell_nnz = m_row.block.size * m_bs->cols[block_id].size;
641 if (transpose_block_structure_) {
642 transpose_block_structure_->rows[block_id].cells.emplace_back(
643 row_block_id, num_nonzeros_);
644 transpose_block_structure_->rows[block_id].nnz += cell_nnz;
645 }
646
647 num_nonzeros_ += cell_nnz;
Austin Schuh70cc9552019-01-21 19:46:48 -0800648 }
649 }
650
651 if (num_nonzeros_ > max_num_nonzeros_) {
Austin Schuh3de38b02024-06-25 18:25:10 -0700652 double* old_values = values_;
653 values_ = AllocateValues(num_nonzeros_);
654 std::copy_n(old_values, old_num_nonzeros, values_);
Austin Schuh70cc9552019-01-21 19:46:48 -0800655 max_num_nonzeros_ = num_nonzeros_;
Austin Schuh3de38b02024-06-25 18:25:10 -0700656 FreeValues(old_values);
Austin Schuh70cc9552019-01-21 19:46:48 -0800657 }
658
Austin Schuh3de38b02024-06-25 18:25:10 -0700659 std::copy(
660 m.values(), m.values() + m.num_nonzeros(), values_ + old_num_nonzeros);
661
662 if (transpose_block_structure_ == nullptr) {
663 return;
664 }
665 ComputeCumulativeNumberOfNonZeros(transpose_block_structure_->rows);
Austin Schuh70cc9552019-01-21 19:46:48 -0800666}
667
668void BlockSparseMatrix::DeleteRowBlocks(const int delta_row_blocks) {
669 const int num_row_blocks = block_structure_->rows.size();
Austin Schuh3de38b02024-06-25 18:25:10 -0700670 const int new_num_row_blocks = num_row_blocks - delta_row_blocks;
Austin Schuh70cc9552019-01-21 19:46:48 -0800671 int delta_num_nonzeros = 0;
672 int delta_num_rows = 0;
673 const std::vector<Block>& column_blocks = block_structure_->cols;
674 for (int i = 0; i < delta_row_blocks; ++i) {
675 const CompressedRow& row = block_structure_->rows[num_row_blocks - i - 1];
676 delta_num_rows += row.block.size;
677 for (int c = 0; c < row.cells.size(); ++c) {
678 const Cell& cell = row.cells[c];
679 delta_num_nonzeros += row.block.size * column_blocks[cell.block_id].size;
Austin Schuh3de38b02024-06-25 18:25:10 -0700680
681 if (transpose_block_structure_) {
682 auto& col_cells = transpose_block_structure_->rows[cell.block_id].cells;
683 while (!col_cells.empty() &&
684 col_cells.back().block_id >= new_num_row_blocks) {
685 const int del_block_id = col_cells.back().block_id;
686 const int del_block_rows =
687 block_structure_->rows[del_block_id].block.size;
688 const int del_block_cols = column_blocks[cell.block_id].size;
689 const int del_cell_nnz = del_block_rows * del_block_cols;
690 transpose_block_structure_->rows[cell.block_id].nnz -= del_cell_nnz;
691 col_cells.pop_back();
692 }
693 }
Austin Schuh70cc9552019-01-21 19:46:48 -0800694 }
695 }
696 num_nonzeros_ -= delta_num_nonzeros;
697 num_rows_ -= delta_num_rows;
Austin Schuh3de38b02024-06-25 18:25:10 -0700698 block_structure_->rows.resize(new_num_row_blocks);
699
700 if (transpose_block_structure_ == nullptr) {
701 return;
702 }
703 for (int i = 0; i < delta_row_blocks; ++i) {
704 transpose_block_structure_->cols.pop_back();
705 }
706
707 ComputeCumulativeNumberOfNonZeros(transpose_block_structure_->rows);
Austin Schuh70cc9552019-01-21 19:46:48 -0800708}
709
Austin Schuh3de38b02024-06-25 18:25:10 -0700710std::unique_ptr<BlockSparseMatrix> BlockSparseMatrix::CreateRandomMatrix(
711 const BlockSparseMatrix::RandomMatrixOptions& options,
712 std::mt19937& prng,
713 bool use_page_locked_memory) {
Austin Schuh70cc9552019-01-21 19:46:48 -0800714 CHECK_GT(options.num_row_blocks, 0);
715 CHECK_GT(options.min_row_block_size, 0);
716 CHECK_GT(options.max_row_block_size, 0);
717 CHECK_LE(options.min_row_block_size, options.max_row_block_size);
718 CHECK_GT(options.block_density, 0.0);
719 CHECK_LE(options.block_density, 1.0);
720
Austin Schuh3de38b02024-06-25 18:25:10 -0700721 std::uniform_int_distribution<int> col_distribution(
722 options.min_col_block_size, options.max_col_block_size);
723 std::uniform_int_distribution<int> row_distribution(
724 options.min_row_block_size, options.max_row_block_size);
725 auto bs = std::make_unique<CompressedRowBlockStructure>();
Austin Schuh70cc9552019-01-21 19:46:48 -0800726 if (options.col_blocks.empty()) {
727 CHECK_GT(options.num_col_blocks, 0);
728 CHECK_GT(options.min_col_block_size, 0);
729 CHECK_GT(options.max_col_block_size, 0);
730 CHECK_LE(options.min_col_block_size, options.max_col_block_size);
731
732 // Generate the col block structure.
733 int col_block_position = 0;
734 for (int i = 0; i < options.num_col_blocks; ++i) {
Austin Schuh3de38b02024-06-25 18:25:10 -0700735 const int col_block_size = col_distribution(prng);
736 bs->cols.emplace_back(col_block_size, col_block_position);
Austin Schuh70cc9552019-01-21 19:46:48 -0800737 col_block_position += col_block_size;
738 }
739 } else {
740 bs->cols = options.col_blocks;
741 }
742
743 bool matrix_has_blocks = false;
Austin Schuh3de38b02024-06-25 18:25:10 -0700744 std::uniform_real_distribution<double> uniform01(0.0, 1.0);
Austin Schuh70cc9552019-01-21 19:46:48 -0800745 while (!matrix_has_blocks) {
746 VLOG(1) << "Clearing";
747 bs->rows.clear();
748 int row_block_position = 0;
749 int value_position = 0;
750 for (int r = 0; r < options.num_row_blocks; ++r) {
Austin Schuh3de38b02024-06-25 18:25:10 -0700751 const int row_block_size = row_distribution(prng);
752 bs->rows.emplace_back();
Austin Schuh70cc9552019-01-21 19:46:48 -0800753 CompressedRow& row = bs->rows.back();
754 row.block.size = row_block_size;
755 row.block.position = row_block_position;
756 row_block_position += row_block_size;
757 for (int c = 0; c < bs->cols.size(); ++c) {
Austin Schuh3de38b02024-06-25 18:25:10 -0700758 if (uniform01(prng) > options.block_density) continue;
Austin Schuh70cc9552019-01-21 19:46:48 -0800759
Austin Schuh3de38b02024-06-25 18:25:10 -0700760 row.cells.emplace_back();
Austin Schuh70cc9552019-01-21 19:46:48 -0800761 Cell& cell = row.cells.back();
762 cell.block_id = c;
763 cell.position = value_position;
764 value_position += row_block_size * bs->cols[c].size;
765 matrix_has_blocks = true;
766 }
767 }
768 }
769
Austin Schuh3de38b02024-06-25 18:25:10 -0700770 auto matrix =
771 std::make_unique<BlockSparseMatrix>(bs.release(), use_page_locked_memory);
Austin Schuh70cc9552019-01-21 19:46:48 -0800772 double* values = matrix->mutable_values();
Austin Schuh3de38b02024-06-25 18:25:10 -0700773 std::normal_distribution<double> standard_normal_distribution;
774 std::generate_n(
775 values, matrix->num_nonzeros(), [&standard_normal_distribution, &prng] {
776 return standard_normal_distribution(prng);
777 });
Austin Schuh70cc9552019-01-21 19:46:48 -0800778
779 return matrix;
780}
781
Austin Schuh3de38b02024-06-25 18:25:10 -0700782std::unique_ptr<CompressedRowBlockStructure> CreateTranspose(
783 const CompressedRowBlockStructure& bs) {
784 auto transpose = std::make_unique<CompressedRowBlockStructure>();
785
786 transpose->rows.resize(bs.cols.size());
787 for (int i = 0; i < bs.cols.size(); ++i) {
788 transpose->rows[i].block = bs.cols[i];
789 transpose->rows[i].nnz = 0;
790 }
791
792 transpose->cols.resize(bs.rows.size());
793 for (int i = 0; i < bs.rows.size(); ++i) {
794 auto& row = bs.rows[i];
795 transpose->cols[i] = row.block;
796
797 const int nrows = row.block.size;
798 for (auto& cell : row.cells) {
799 transpose->rows[cell.block_id].cells.emplace_back(i, cell.position);
800 const int ncols = transpose->rows[cell.block_id].block.size;
801 transpose->rows[cell.block_id].nnz += nrows * ncols;
802 }
803 }
804 ComputeCumulativeNumberOfNonZeros(transpose->rows);
805 return transpose;
806}
807
808double* BlockSparseMatrix::AllocateValues(int size) {
809 if (!use_page_locked_memory_) {
810 return new double[size];
811 }
812
813#ifndef CERES_NO_CUDA
814
815 double* values = nullptr;
816 CHECK_EQ(cudaSuccess,
817 cudaHostAlloc(&values, sizeof(double) * size, cudaHostAllocDefault));
818 return values;
819#else
820 LOG(FATAL) << "Page locked memory requested when CUDA is not available. "
821 << "This is a Ceres bug; please contact the developers!";
822 return nullptr;
823#endif
824};
825
826void BlockSparseMatrix::FreeValues(double*& values) {
827 if (!use_page_locked_memory_) {
828 delete[] values;
829 values = nullptr;
830 return;
831 }
832
833#ifndef CERES_NO_CUDA
834 CHECK_EQ(cudaSuccess, cudaFreeHost(values));
835 values = nullptr;
836#else
837 LOG(FATAL) << "Page locked memory requested when CUDA is not available. "
838 << "This is a Ceres bug; please contact the developers!";
839#endif
840};
841
842} // namespace ceres::internal