Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 1 | // Ceres Solver - A fast non-linear least squares minimizer |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 2 | // Copyright 2023 Google Inc. All rights reserved. |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 3 | // http://ceres-solver.org/ |
| 4 | // |
| 5 | // Redistribution and use in source and binary forms, with or without |
| 6 | // modification, are permitted provided that the following conditions are met: |
| 7 | // |
| 8 | // * Redistributions of source code must retain the above copyright notice, |
| 9 | // this list of conditions and the following disclaimer. |
| 10 | // * Redistributions in binary form must reproduce the above copyright notice, |
| 11 | // this list of conditions and the following disclaimer in the documentation |
| 12 | // and/or other materials provided with the distribution. |
| 13 | // * Neither the name of Google Inc. nor the names of its contributors may be |
| 14 | // used to endorse or promote products derived from this software without |
| 15 | // specific prior written permission. |
| 16 | // |
| 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 18 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 19 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 20 | // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 21 | // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 22 | // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 23 | // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 24 | // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 25 | // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 26 | // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 27 | // POSSIBILITY OF SUCH DAMAGE. |
| 28 | // |
| 29 | // Author: sameeragarwal@google.com (Sameer Agarwal) |
| 30 | |
| 31 | #include "ceres/block_sparse_matrix.h" |
| 32 | |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 33 | #include <algorithm> |
Austin Schuh | 1d1e6ea | 2020-12-23 21:56:30 -0800 | [diff] [blame] | 34 | #include <cstddef> |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 35 | #include <memory> |
| 36 | #include <numeric> |
| 37 | #include <random> |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 38 | #include <vector> |
Austin Schuh | 1d1e6ea | 2020-12-23 21:56:30 -0800 | [diff] [blame] | 39 | |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 40 | #include "ceres/block_structure.h" |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 41 | #include "ceres/crs_matrix.h" |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 42 | #include "ceres/internal/eigen.h" |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 43 | #include "ceres/parallel_for.h" |
| 44 | #include "ceres/parallel_vector_ops.h" |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 45 | #include "ceres/small_blas.h" |
| 46 | #include "ceres/triplet_sparse_matrix.h" |
| 47 | #include "glog/logging.h" |
| 48 | |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 49 | #ifndef CERES_NO_CUDA |
| 50 | #include "cuda_runtime.h" |
| 51 | #endif |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 52 | |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 53 | namespace ceres::internal { |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 54 | |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 55 | namespace { |
| 56 | void ComputeCumulativeNumberOfNonZeros(std::vector<CompressedList>& rows) { |
| 57 | if (rows.empty()) { |
| 58 | return; |
| 59 | } |
| 60 | rows[0].cumulative_nnz = rows[0].nnz; |
| 61 | for (int c = 1; c < rows.size(); ++c) { |
| 62 | const int curr_nnz = rows[c].nnz; |
| 63 | rows[c].cumulative_nnz = curr_nnz + rows[c - 1].cumulative_nnz; |
| 64 | } |
| 65 | } |
| 66 | |
| 67 | template <bool transpose> |
| 68 | std::unique_ptr<CompressedRowSparseMatrix> |
| 69 | CreateStructureOfCompressedRowSparseMatrix( |
| 70 | int num_rows, |
| 71 | int num_cols, |
| 72 | int num_nonzeros, |
| 73 | const CompressedRowBlockStructure* block_structure) { |
| 74 | auto crs_matrix = std::make_unique<CompressedRowSparseMatrix>( |
| 75 | num_rows, num_cols, num_nonzeros); |
| 76 | auto crs_cols = crs_matrix->mutable_cols(); |
| 77 | auto crs_rows = crs_matrix->mutable_rows(); |
| 78 | int value_offset = 0; |
| 79 | const int num_row_blocks = block_structure->rows.size(); |
| 80 | const auto& cols = block_structure->cols; |
| 81 | *crs_rows++ = 0; |
| 82 | for (int row_block_id = 0; row_block_id < num_row_blocks; ++row_block_id) { |
| 83 | const auto& row_block = block_structure->rows[row_block_id]; |
| 84 | // Empty row block: only requires setting row offsets |
| 85 | if (row_block.cells.empty()) { |
| 86 | std::fill(crs_rows, crs_rows + row_block.block.size, value_offset); |
| 87 | crs_rows += row_block.block.size; |
| 88 | continue; |
| 89 | } |
| 90 | |
| 91 | int row_nnz = 0; |
| 92 | if constexpr (transpose) { |
| 93 | // Transposed block structure comes with nnz in row-block filled-in |
| 94 | row_nnz = row_block.nnz / row_block.block.size; |
| 95 | } else { |
| 96 | // Nnz field of non-transposed block structure is not filled and it can |
| 97 | // have non-sequential structure (consider the case of jacobian for |
| 98 | // Schur-complement solver: E and F blocks are stored separately). |
| 99 | for (auto& c : row_block.cells) { |
| 100 | row_nnz += cols[c.block_id].size; |
| 101 | } |
| 102 | } |
| 103 | |
| 104 | // Row-wise setup of matrix structure |
| 105 | for (int row = 0; row < row_block.block.size; ++row) { |
| 106 | value_offset += row_nnz; |
| 107 | *crs_rows++ = value_offset; |
| 108 | for (auto& c : row_block.cells) { |
| 109 | const int col_block_size = cols[c.block_id].size; |
| 110 | const int col_position = cols[c.block_id].position; |
| 111 | std::iota(crs_cols, crs_cols + col_block_size, col_position); |
| 112 | crs_cols += col_block_size; |
| 113 | } |
| 114 | } |
| 115 | } |
| 116 | return crs_matrix; |
| 117 | } |
| 118 | |
| 119 | template <bool transpose> |
| 120 | void UpdateCompressedRowSparseMatrixImpl( |
| 121 | CompressedRowSparseMatrix* crs_matrix, |
| 122 | const double* values, |
| 123 | const CompressedRowBlockStructure* block_structure) { |
| 124 | auto crs_values = crs_matrix->mutable_values(); |
| 125 | auto crs_rows = crs_matrix->mutable_rows(); |
| 126 | const int num_row_blocks = block_structure->rows.size(); |
| 127 | const auto& cols = block_structure->cols; |
| 128 | for (int row_block_id = 0; row_block_id < num_row_blocks; ++row_block_id) { |
| 129 | const auto& row_block = block_structure->rows[row_block_id]; |
| 130 | const int row_block_size = row_block.block.size; |
| 131 | const int row_nnz = crs_rows[1] - crs_rows[0]; |
| 132 | crs_rows += row_block_size; |
| 133 | |
| 134 | if (row_nnz == 0) { |
| 135 | continue; |
| 136 | } |
| 137 | |
| 138 | MatrixRef crs_row_block(crs_values, row_block_size, row_nnz); |
| 139 | int col_offset = 0; |
| 140 | for (auto& c : row_block.cells) { |
| 141 | const int col_block_size = cols[c.block_id].size; |
| 142 | auto crs_cell = |
| 143 | crs_row_block.block(0, col_offset, row_block_size, col_block_size); |
| 144 | if constexpr (transpose) { |
| 145 | // Transposed matrix is filled using transposed block-strucutre |
| 146 | ConstMatrixRef cell( |
| 147 | values + c.position, col_block_size, row_block_size); |
| 148 | crs_cell = cell.transpose(); |
| 149 | } else { |
| 150 | ConstMatrixRef cell( |
| 151 | values + c.position, row_block_size, col_block_size); |
| 152 | crs_cell = cell; |
| 153 | } |
| 154 | col_offset += col_block_size; |
| 155 | } |
| 156 | crs_values += row_nnz * row_block_size; |
| 157 | } |
| 158 | } |
| 159 | |
| 160 | void SetBlockStructureOfCompressedRowSparseMatrix( |
| 161 | CompressedRowSparseMatrix* crs_matrix, |
| 162 | CompressedRowBlockStructure* block_structure) { |
| 163 | const int num_row_blocks = block_structure->rows.size(); |
| 164 | auto& row_blocks = *crs_matrix->mutable_row_blocks(); |
| 165 | row_blocks.resize(num_row_blocks); |
| 166 | for (int i = 0; i < num_row_blocks; ++i) { |
| 167 | row_blocks[i] = block_structure->rows[i].block; |
| 168 | } |
| 169 | |
| 170 | auto& col_blocks = *crs_matrix->mutable_col_blocks(); |
| 171 | col_blocks = block_structure->cols; |
| 172 | } |
| 173 | |
| 174 | } // namespace |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 175 | |
| 176 | BlockSparseMatrix::BlockSparseMatrix( |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 177 | CompressedRowBlockStructure* block_structure, bool use_page_locked_memory) |
| 178 | : use_page_locked_memory_(use_page_locked_memory), |
| 179 | num_rows_(0), |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 180 | num_cols_(0), |
| 181 | num_nonzeros_(0), |
| 182 | block_structure_(block_structure) { |
| 183 | CHECK(block_structure_ != nullptr); |
| 184 | |
| 185 | // Count the number of columns in the matrix. |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 186 | for (auto& col : block_structure_->cols) { |
| 187 | num_cols_ += col.size; |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 188 | } |
| 189 | |
| 190 | // Count the number of non-zero entries and the number of rows in |
| 191 | // the matrix. |
| 192 | for (int i = 0; i < block_structure_->rows.size(); ++i) { |
| 193 | int row_block_size = block_structure_->rows[i].block.size; |
| 194 | num_rows_ += row_block_size; |
| 195 | |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 196 | const std::vector<Cell>& cells = block_structure_->rows[i].cells; |
| 197 | for (const auto& cell : cells) { |
| 198 | int col_block_id = cell.block_id; |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 199 | int col_block_size = block_structure_->cols[col_block_id].size; |
| 200 | num_nonzeros_ += col_block_size * row_block_size; |
| 201 | } |
| 202 | } |
| 203 | |
| 204 | CHECK_GE(num_rows_, 0); |
| 205 | CHECK_GE(num_cols_, 0); |
| 206 | CHECK_GE(num_nonzeros_, 0); |
Austin Schuh | 1d1e6ea | 2020-12-23 21:56:30 -0800 | [diff] [blame] | 207 | VLOG(2) << "Allocating values array with " << num_nonzeros_ * sizeof(double) |
| 208 | << " bytes."; // NOLINT |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 209 | |
| 210 | values_ = AllocateValues(num_nonzeros_); |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 211 | max_num_nonzeros_ = num_nonzeros_; |
| 212 | CHECK(values_ != nullptr); |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 213 | AddTransposeBlockStructure(); |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 214 | } |
| 215 | |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 216 | BlockSparseMatrix::~BlockSparseMatrix() { FreeValues(values_); } |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 217 | |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 218 | void BlockSparseMatrix::AddTransposeBlockStructure() { |
| 219 | if (transpose_block_structure_ == nullptr) { |
| 220 | transpose_block_structure_ = CreateTranspose(*block_structure_); |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 221 | } |
| 222 | } |
| 223 | |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 224 | void BlockSparseMatrix::SetZero() { |
| 225 | std::fill(values_, values_ + num_nonzeros_, 0.0); |
| 226 | } |
| 227 | |
| 228 | void BlockSparseMatrix::SetZero(ContextImpl* context, int num_threads) { |
| 229 | ParallelSetZero(context, num_threads, values_, num_nonzeros_); |
| 230 | } |
| 231 | |
| 232 | void BlockSparseMatrix::RightMultiplyAndAccumulate(const double* x, |
| 233 | double* y) const { |
| 234 | RightMultiplyAndAccumulate(x, y, nullptr, 1); |
| 235 | } |
| 236 | |
| 237 | void BlockSparseMatrix::RightMultiplyAndAccumulate(const double* x, |
| 238 | double* y, |
| 239 | ContextImpl* context, |
| 240 | int num_threads) const { |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 241 | CHECK(x != nullptr); |
| 242 | CHECK(y != nullptr); |
| 243 | |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 244 | const auto values = values_; |
| 245 | const auto block_structure = block_structure_.get(); |
| 246 | const auto num_row_blocks = block_structure->rows.size(); |
| 247 | |
| 248 | ParallelFor(context, |
| 249 | 0, |
| 250 | num_row_blocks, |
| 251 | num_threads, |
| 252 | [values, block_structure, x, y](int row_block_id) { |
| 253 | const int row_block_pos = |
| 254 | block_structure->rows[row_block_id].block.position; |
| 255 | const int row_block_size = |
| 256 | block_structure->rows[row_block_id].block.size; |
| 257 | const auto& cells = block_structure->rows[row_block_id].cells; |
| 258 | for (const auto& cell : cells) { |
| 259 | const int col_block_id = cell.block_id; |
| 260 | const int col_block_size = |
| 261 | block_structure->cols[col_block_id].size; |
| 262 | const int col_block_pos = |
| 263 | block_structure->cols[col_block_id].position; |
| 264 | MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( |
| 265 | values + cell.position, |
| 266 | row_block_size, |
| 267 | col_block_size, |
| 268 | x + col_block_pos, |
| 269 | y + row_block_pos); |
| 270 | } |
| 271 | }); |
| 272 | } |
| 273 | |
| 274 | // TODO(https://github.com/ceres-solver/ceres-solver/issues/933): This method |
| 275 | // might benefit from caching column-block partition |
| 276 | void BlockSparseMatrix::LeftMultiplyAndAccumulate(const double* x, |
| 277 | double* y, |
| 278 | ContextImpl* context, |
| 279 | int num_threads) const { |
| 280 | // While utilizing transposed structure allows to perform parallel |
| 281 | // left-multiplication by dense vector, it makes access patterns to matrix |
| 282 | // elements scattered. Thus, multiplication using transposed structure |
| 283 | // is only useful for parallel execution |
| 284 | CHECK(x != nullptr); |
| 285 | CHECK(y != nullptr); |
| 286 | if (transpose_block_structure_ == nullptr || num_threads == 1) { |
| 287 | LeftMultiplyAndAccumulate(x, y); |
| 288 | return; |
| 289 | } |
| 290 | |
| 291 | auto transpose_bs = transpose_block_structure_.get(); |
| 292 | const auto values = values_; |
| 293 | const int num_col_blocks = transpose_bs->rows.size(); |
| 294 | if (!num_col_blocks) { |
| 295 | return; |
| 296 | } |
| 297 | |
| 298 | // Use non-zero count as iteration cost for guided parallel-for loop |
| 299 | ParallelFor( |
| 300 | context, |
| 301 | 0, |
| 302 | num_col_blocks, |
| 303 | num_threads, |
| 304 | [values, transpose_bs, x, y](int row_block_id) { |
| 305 | int row_block_pos = transpose_bs->rows[row_block_id].block.position; |
| 306 | int row_block_size = transpose_bs->rows[row_block_id].block.size; |
| 307 | auto& cells = transpose_bs->rows[row_block_id].cells; |
| 308 | |
| 309 | for (auto& cell : cells) { |
| 310 | const int col_block_id = cell.block_id; |
| 311 | const int col_block_size = transpose_bs->cols[col_block_id].size; |
| 312 | const int col_block_pos = transpose_bs->cols[col_block_id].position; |
| 313 | MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( |
| 314 | values + cell.position, |
| 315 | col_block_size, |
| 316 | row_block_size, |
| 317 | x + col_block_pos, |
| 318 | y + row_block_pos); |
| 319 | } |
| 320 | }, |
| 321 | transpose_bs->rows.data(), |
| 322 | [](const CompressedRow& row) { return row.cumulative_nnz; }); |
| 323 | } |
| 324 | |
| 325 | void BlockSparseMatrix::LeftMultiplyAndAccumulate(const double* x, |
| 326 | double* y) const { |
| 327 | CHECK(x != nullptr); |
| 328 | CHECK(y != nullptr); |
| 329 | // Single-threaded left products are always computed using a non-transpose |
| 330 | // block structure, because it has linear acess pattern to matrix elements |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 331 | for (int i = 0; i < block_structure_->rows.size(); ++i) { |
| 332 | int row_block_pos = block_structure_->rows[i].block.position; |
| 333 | int row_block_size = block_structure_->rows[i].block.size; |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 334 | const auto& cells = block_structure_->rows[i].cells; |
| 335 | for (const auto& cell : cells) { |
| 336 | int col_block_id = cell.block_id; |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 337 | int col_block_size = block_structure_->cols[col_block_id].size; |
| 338 | int col_block_pos = block_structure_->cols[col_block_id].position; |
| 339 | MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 340 | values_ + cell.position, |
Austin Schuh | 1d1e6ea | 2020-12-23 21:56:30 -0800 | [diff] [blame] | 341 | row_block_size, |
| 342 | col_block_size, |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 343 | x + row_block_pos, |
| 344 | y + col_block_pos); |
| 345 | } |
| 346 | } |
| 347 | } |
| 348 | |
| 349 | void BlockSparseMatrix::SquaredColumnNorm(double* x) const { |
| 350 | CHECK(x != nullptr); |
| 351 | VectorRef(x, num_cols_).setZero(); |
| 352 | for (int i = 0; i < block_structure_->rows.size(); ++i) { |
| 353 | int row_block_size = block_structure_->rows[i].block.size; |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 354 | auto& cells = block_structure_->rows[i].cells; |
| 355 | for (const auto& cell : cells) { |
| 356 | int col_block_id = cell.block_id; |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 357 | int col_block_size = block_structure_->cols[col_block_id].size; |
| 358 | int col_block_pos = block_structure_->cols[col_block_id].position; |
Austin Schuh | 1d1e6ea | 2020-12-23 21:56:30 -0800 | [diff] [blame] | 359 | const MatrixRef m( |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 360 | values_ + cell.position, row_block_size, col_block_size); |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 361 | VectorRef(x + col_block_pos, col_block_size) += m.colwise().squaredNorm(); |
| 362 | } |
| 363 | } |
| 364 | } |
| 365 | |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 366 | // TODO(https://github.com/ceres-solver/ceres-solver/issues/933): This method |
| 367 | // might benefit from caching column-block partition |
| 368 | void BlockSparseMatrix::SquaredColumnNorm(double* x, |
| 369 | ContextImpl* context, |
| 370 | int num_threads) const { |
| 371 | if (transpose_block_structure_ == nullptr || num_threads == 1) { |
| 372 | SquaredColumnNorm(x); |
| 373 | return; |
| 374 | } |
| 375 | |
| 376 | CHECK(x != nullptr); |
| 377 | ParallelSetZero(context, num_threads, x, num_cols_); |
| 378 | |
| 379 | auto transpose_bs = transpose_block_structure_.get(); |
| 380 | const auto values = values_; |
| 381 | const int num_col_blocks = transpose_bs->rows.size(); |
| 382 | ParallelFor( |
| 383 | context, |
| 384 | 0, |
| 385 | num_col_blocks, |
| 386 | num_threads, |
| 387 | [values, transpose_bs, x](int row_block_id) { |
| 388 | const auto& row = transpose_bs->rows[row_block_id]; |
| 389 | |
| 390 | for (auto& cell : row.cells) { |
| 391 | const auto& col = transpose_bs->cols[cell.block_id]; |
| 392 | const MatrixRef m(values + cell.position, col.size, row.block.size); |
| 393 | VectorRef(x + row.block.position, row.block.size) += |
| 394 | m.colwise().squaredNorm(); |
| 395 | } |
| 396 | }, |
| 397 | transpose_bs->rows.data(), |
| 398 | [](const CompressedRow& row) { return row.cumulative_nnz; }); |
| 399 | } |
| 400 | |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 401 | void BlockSparseMatrix::ScaleColumns(const double* scale) { |
| 402 | CHECK(scale != nullptr); |
| 403 | |
| 404 | for (int i = 0; i < block_structure_->rows.size(); ++i) { |
| 405 | int row_block_size = block_structure_->rows[i].block.size; |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 406 | auto& cells = block_structure_->rows[i].cells; |
| 407 | for (const auto& cell : cells) { |
| 408 | int col_block_id = cell.block_id; |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 409 | int col_block_size = block_structure_->cols[col_block_id].size; |
| 410 | int col_block_pos = block_structure_->cols[col_block_id].position; |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 411 | MatrixRef m(values_ + cell.position, row_block_size, col_block_size); |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 412 | m *= ConstVectorRef(scale + col_block_pos, col_block_size).asDiagonal(); |
| 413 | } |
| 414 | } |
| 415 | } |
| 416 | |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 417 | // TODO(https://github.com/ceres-solver/ceres-solver/issues/933): This method |
| 418 | // might benefit from caching column-block partition |
| 419 | void BlockSparseMatrix::ScaleColumns(const double* scale, |
| 420 | ContextImpl* context, |
| 421 | int num_threads) { |
| 422 | if (transpose_block_structure_ == nullptr || num_threads == 1) { |
| 423 | ScaleColumns(scale); |
| 424 | return; |
| 425 | } |
| 426 | |
| 427 | CHECK(scale != nullptr); |
| 428 | auto transpose_bs = transpose_block_structure_.get(); |
| 429 | auto values = values_; |
| 430 | const int num_col_blocks = transpose_bs->rows.size(); |
| 431 | ParallelFor( |
| 432 | context, |
| 433 | 0, |
| 434 | num_col_blocks, |
| 435 | num_threads, |
| 436 | [values, transpose_bs, scale](int row_block_id) { |
| 437 | const auto& row = transpose_bs->rows[row_block_id]; |
| 438 | |
| 439 | for (auto& cell : row.cells) { |
| 440 | const auto& col = transpose_bs->cols[cell.block_id]; |
| 441 | MatrixRef m(values + cell.position, col.size, row.block.size); |
| 442 | m *= ConstVectorRef(scale + row.block.position, row.block.size) |
| 443 | .asDiagonal(); |
| 444 | } |
| 445 | }, |
| 446 | transpose_bs->rows.data(), |
| 447 | [](const CompressedRow& row) { return row.cumulative_nnz; }); |
| 448 | } |
| 449 | std::unique_ptr<CompressedRowSparseMatrix> |
| 450 | BlockSparseMatrix::ToCompressedRowSparseMatrixTranspose() const { |
| 451 | auto bs = transpose_block_structure_.get(); |
| 452 | auto crs_matrix = CreateStructureOfCompressedRowSparseMatrix<true>( |
| 453 | num_cols_, num_rows_, num_nonzeros_, bs); |
| 454 | |
| 455 | SetBlockStructureOfCompressedRowSparseMatrix(crs_matrix.get(), bs); |
| 456 | |
| 457 | UpdateCompressedRowSparseMatrixTranspose(crs_matrix.get()); |
| 458 | return crs_matrix; |
| 459 | } |
| 460 | |
| 461 | std::unique_ptr<CompressedRowSparseMatrix> |
| 462 | BlockSparseMatrix::ToCompressedRowSparseMatrix() const { |
| 463 | auto crs_matrix = CreateStructureOfCompressedRowSparseMatrix<false>( |
| 464 | num_rows_, num_cols_, num_nonzeros_, block_structure_.get()); |
| 465 | |
| 466 | SetBlockStructureOfCompressedRowSparseMatrix(crs_matrix.get(), |
| 467 | block_structure_.get()); |
| 468 | |
| 469 | UpdateCompressedRowSparseMatrix(crs_matrix.get()); |
| 470 | return crs_matrix; |
| 471 | } |
| 472 | |
| 473 | void BlockSparseMatrix::UpdateCompressedRowSparseMatrixTranspose( |
| 474 | CompressedRowSparseMatrix* crs_matrix) const { |
| 475 | CHECK(crs_matrix != nullptr); |
| 476 | CHECK_EQ(crs_matrix->num_rows(), num_cols_); |
| 477 | CHECK_EQ(crs_matrix->num_cols(), num_rows_); |
| 478 | CHECK_EQ(crs_matrix->num_nonzeros(), num_nonzeros_); |
| 479 | UpdateCompressedRowSparseMatrixImpl<true>( |
| 480 | crs_matrix, values(), transpose_block_structure_.get()); |
| 481 | } |
| 482 | void BlockSparseMatrix::UpdateCompressedRowSparseMatrix( |
| 483 | CompressedRowSparseMatrix* crs_matrix) const { |
| 484 | CHECK(crs_matrix != nullptr); |
| 485 | CHECK_EQ(crs_matrix->num_rows(), num_rows_); |
| 486 | CHECK_EQ(crs_matrix->num_cols(), num_cols_); |
| 487 | CHECK_EQ(crs_matrix->num_nonzeros(), num_nonzeros_); |
| 488 | UpdateCompressedRowSparseMatrixImpl<false>( |
| 489 | crs_matrix, values(), block_structure_.get()); |
| 490 | } |
| 491 | |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 492 | void BlockSparseMatrix::ToDenseMatrix(Matrix* dense_matrix) const { |
| 493 | CHECK(dense_matrix != nullptr); |
| 494 | |
| 495 | dense_matrix->resize(num_rows_, num_cols_); |
| 496 | dense_matrix->setZero(); |
| 497 | Matrix& m = *dense_matrix; |
| 498 | |
| 499 | for (int i = 0; i < block_structure_->rows.size(); ++i) { |
| 500 | int row_block_pos = block_structure_->rows[i].block.position; |
| 501 | int row_block_size = block_structure_->rows[i].block.size; |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 502 | auto& cells = block_structure_->rows[i].cells; |
| 503 | for (const auto& cell : cells) { |
| 504 | int col_block_id = cell.block_id; |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 505 | int col_block_size = block_structure_->cols[col_block_id].size; |
| 506 | int col_block_pos = block_structure_->cols[col_block_id].position; |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 507 | int jac_pos = cell.position; |
Austin Schuh | 1d1e6ea | 2020-12-23 21:56:30 -0800 | [diff] [blame] | 508 | m.block(row_block_pos, col_block_pos, row_block_size, col_block_size) += |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 509 | MatrixRef(values_ + jac_pos, row_block_size, col_block_size); |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 510 | } |
| 511 | } |
| 512 | } |
| 513 | |
| 514 | void BlockSparseMatrix::ToTripletSparseMatrix( |
| 515 | TripletSparseMatrix* matrix) const { |
| 516 | CHECK(matrix != nullptr); |
| 517 | |
| 518 | matrix->Reserve(num_nonzeros_); |
| 519 | matrix->Resize(num_rows_, num_cols_); |
| 520 | matrix->SetZero(); |
| 521 | |
| 522 | for (int i = 0; i < block_structure_->rows.size(); ++i) { |
| 523 | int row_block_pos = block_structure_->rows[i].block.position; |
| 524 | int row_block_size = block_structure_->rows[i].block.size; |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 525 | const auto& cells = block_structure_->rows[i].cells; |
| 526 | for (const auto& cell : cells) { |
| 527 | int col_block_id = cell.block_id; |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 528 | int col_block_size = block_structure_->cols[col_block_id].size; |
| 529 | int col_block_pos = block_structure_->cols[col_block_id].position; |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 530 | int jac_pos = cell.position; |
Austin Schuh | 1d1e6ea | 2020-12-23 21:56:30 -0800 | [diff] [blame] | 531 | for (int r = 0; r < row_block_size; ++r) { |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 532 | for (int c = 0; c < col_block_size; ++c, ++jac_pos) { |
| 533 | matrix->mutable_rows()[jac_pos] = row_block_pos + r; |
| 534 | matrix->mutable_cols()[jac_pos] = col_block_pos + c; |
| 535 | matrix->mutable_values()[jac_pos] = values_[jac_pos]; |
| 536 | } |
| 537 | } |
| 538 | } |
| 539 | } |
| 540 | matrix->set_num_nonzeros(num_nonzeros_); |
| 541 | } |
| 542 | |
| 543 | // Return a pointer to the block structure. We continue to hold |
| 544 | // ownership of the object though. |
Austin Schuh | 1d1e6ea | 2020-12-23 21:56:30 -0800 | [diff] [blame] | 545 | const CompressedRowBlockStructure* BlockSparseMatrix::block_structure() const { |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 546 | return block_structure_.get(); |
| 547 | } |
| 548 | |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 549 | // Return a pointer to the block structure of matrix transpose. We continue to |
| 550 | // hold ownership of the object though. |
| 551 | const CompressedRowBlockStructure* |
| 552 | BlockSparseMatrix::transpose_block_structure() const { |
| 553 | return transpose_block_structure_.get(); |
| 554 | } |
| 555 | |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 556 | void BlockSparseMatrix::ToTextFile(FILE* file) const { |
| 557 | CHECK(file != nullptr); |
| 558 | for (int i = 0; i < block_structure_->rows.size(); ++i) { |
| 559 | const int row_block_pos = block_structure_->rows[i].block.position; |
| 560 | const int row_block_size = block_structure_->rows[i].block.size; |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 561 | const auto& cells = block_structure_->rows[i].cells; |
| 562 | for (const auto& cell : cells) { |
| 563 | const int col_block_id = cell.block_id; |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 564 | const int col_block_size = block_structure_->cols[col_block_id].size; |
| 565 | const int col_block_pos = block_structure_->cols[col_block_id].position; |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 566 | int jac_pos = cell.position; |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 567 | for (int r = 0; r < row_block_size; ++r) { |
| 568 | for (int c = 0; c < col_block_size; ++c) { |
Austin Schuh | 1d1e6ea | 2020-12-23 21:56:30 -0800 | [diff] [blame] | 569 | fprintf(file, |
| 570 | "% 10d % 10d %17f\n", |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 571 | row_block_pos + r, |
| 572 | col_block_pos + c, |
| 573 | values_[jac_pos++]); |
| 574 | } |
| 575 | } |
| 576 | } |
| 577 | } |
| 578 | } |
| 579 | |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 580 | std::unique_ptr<BlockSparseMatrix> BlockSparseMatrix::CreateDiagonalMatrix( |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 581 | const double* diagonal, const std::vector<Block>& column_blocks) { |
| 582 | // Create the block structure for the diagonal matrix. |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 583 | auto* bs = new CompressedRowBlockStructure(); |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 584 | bs->cols = column_blocks; |
| 585 | int position = 0; |
| 586 | bs->rows.resize(column_blocks.size(), CompressedRow(1)); |
| 587 | for (int i = 0; i < column_blocks.size(); ++i) { |
| 588 | CompressedRow& row = bs->rows[i]; |
| 589 | row.block = column_blocks[i]; |
| 590 | Cell& cell = row.cells[0]; |
| 591 | cell.block_id = i; |
| 592 | cell.position = position; |
| 593 | position += row.block.size * row.block.size; |
| 594 | } |
| 595 | |
| 596 | // Create the BlockSparseMatrix with the given block structure. |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 597 | auto matrix = std::make_unique<BlockSparseMatrix>(bs); |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 598 | matrix->SetZero(); |
| 599 | |
| 600 | // Fill the values array of the block sparse matrix. |
| 601 | double* values = matrix->mutable_values(); |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 602 | for (const auto& column_block : column_blocks) { |
| 603 | const int size = column_block.size; |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 604 | for (int j = 0; j < size; ++j) { |
| 605 | // (j + 1) * size is compact way of accessing the (j,j) entry. |
| 606 | values[j * (size + 1)] = diagonal[j]; |
| 607 | } |
| 608 | diagonal += size; |
| 609 | values += size * size; |
| 610 | } |
| 611 | |
| 612 | return matrix; |
| 613 | } |
| 614 | |
| 615 | void BlockSparseMatrix::AppendRows(const BlockSparseMatrix& m) { |
| 616 | CHECK_EQ(m.num_cols(), num_cols()); |
| 617 | const CompressedRowBlockStructure* m_bs = m.block_structure(); |
| 618 | CHECK_EQ(m_bs->cols.size(), block_structure_->cols.size()); |
| 619 | |
| 620 | const int old_num_nonzeros = num_nonzeros_; |
| 621 | const int old_num_row_blocks = block_structure_->rows.size(); |
| 622 | block_structure_->rows.resize(old_num_row_blocks + m_bs->rows.size()); |
| 623 | |
| 624 | for (int i = 0; i < m_bs->rows.size(); ++i) { |
| 625 | const CompressedRow& m_row = m_bs->rows[i]; |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 626 | const int row_block_id = old_num_row_blocks + i; |
| 627 | CompressedRow& row = block_structure_->rows[row_block_id]; |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 628 | row.block.size = m_row.block.size; |
| 629 | row.block.position = num_rows_; |
| 630 | num_rows_ += m_row.block.size; |
| 631 | row.cells.resize(m_row.cells.size()); |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 632 | if (transpose_block_structure_) { |
| 633 | transpose_block_structure_->cols.emplace_back(row.block); |
| 634 | } |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 635 | for (int c = 0; c < m_row.cells.size(); ++c) { |
| 636 | const int block_id = m_row.cells[c].block_id; |
| 637 | row.cells[c].block_id = block_id; |
| 638 | row.cells[c].position = num_nonzeros_; |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 639 | |
| 640 | const int cell_nnz = m_row.block.size * m_bs->cols[block_id].size; |
| 641 | if (transpose_block_structure_) { |
| 642 | transpose_block_structure_->rows[block_id].cells.emplace_back( |
| 643 | row_block_id, num_nonzeros_); |
| 644 | transpose_block_structure_->rows[block_id].nnz += cell_nnz; |
| 645 | } |
| 646 | |
| 647 | num_nonzeros_ += cell_nnz; |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 648 | } |
| 649 | } |
| 650 | |
| 651 | if (num_nonzeros_ > max_num_nonzeros_) { |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 652 | double* old_values = values_; |
| 653 | values_ = AllocateValues(num_nonzeros_); |
| 654 | std::copy_n(old_values, old_num_nonzeros, values_); |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 655 | max_num_nonzeros_ = num_nonzeros_; |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 656 | FreeValues(old_values); |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 657 | } |
| 658 | |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 659 | std::copy( |
| 660 | m.values(), m.values() + m.num_nonzeros(), values_ + old_num_nonzeros); |
| 661 | |
| 662 | if (transpose_block_structure_ == nullptr) { |
| 663 | return; |
| 664 | } |
| 665 | ComputeCumulativeNumberOfNonZeros(transpose_block_structure_->rows); |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 666 | } |
| 667 | |
| 668 | void BlockSparseMatrix::DeleteRowBlocks(const int delta_row_blocks) { |
| 669 | const int num_row_blocks = block_structure_->rows.size(); |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 670 | const int new_num_row_blocks = num_row_blocks - delta_row_blocks; |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 671 | int delta_num_nonzeros = 0; |
| 672 | int delta_num_rows = 0; |
| 673 | const std::vector<Block>& column_blocks = block_structure_->cols; |
| 674 | for (int i = 0; i < delta_row_blocks; ++i) { |
| 675 | const CompressedRow& row = block_structure_->rows[num_row_blocks - i - 1]; |
| 676 | delta_num_rows += row.block.size; |
| 677 | for (int c = 0; c < row.cells.size(); ++c) { |
| 678 | const Cell& cell = row.cells[c]; |
| 679 | delta_num_nonzeros += row.block.size * column_blocks[cell.block_id].size; |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 680 | |
| 681 | if (transpose_block_structure_) { |
| 682 | auto& col_cells = transpose_block_structure_->rows[cell.block_id].cells; |
| 683 | while (!col_cells.empty() && |
| 684 | col_cells.back().block_id >= new_num_row_blocks) { |
| 685 | const int del_block_id = col_cells.back().block_id; |
| 686 | const int del_block_rows = |
| 687 | block_structure_->rows[del_block_id].block.size; |
| 688 | const int del_block_cols = column_blocks[cell.block_id].size; |
| 689 | const int del_cell_nnz = del_block_rows * del_block_cols; |
| 690 | transpose_block_structure_->rows[cell.block_id].nnz -= del_cell_nnz; |
| 691 | col_cells.pop_back(); |
| 692 | } |
| 693 | } |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 694 | } |
| 695 | } |
| 696 | num_nonzeros_ -= delta_num_nonzeros; |
| 697 | num_rows_ -= delta_num_rows; |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 698 | block_structure_->rows.resize(new_num_row_blocks); |
| 699 | |
| 700 | if (transpose_block_structure_ == nullptr) { |
| 701 | return; |
| 702 | } |
| 703 | for (int i = 0; i < delta_row_blocks; ++i) { |
| 704 | transpose_block_structure_->cols.pop_back(); |
| 705 | } |
| 706 | |
| 707 | ComputeCumulativeNumberOfNonZeros(transpose_block_structure_->rows); |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 708 | } |
| 709 | |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 710 | std::unique_ptr<BlockSparseMatrix> BlockSparseMatrix::CreateRandomMatrix( |
| 711 | const BlockSparseMatrix::RandomMatrixOptions& options, |
| 712 | std::mt19937& prng, |
| 713 | bool use_page_locked_memory) { |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 714 | CHECK_GT(options.num_row_blocks, 0); |
| 715 | CHECK_GT(options.min_row_block_size, 0); |
| 716 | CHECK_GT(options.max_row_block_size, 0); |
| 717 | CHECK_LE(options.min_row_block_size, options.max_row_block_size); |
| 718 | CHECK_GT(options.block_density, 0.0); |
| 719 | CHECK_LE(options.block_density, 1.0); |
| 720 | |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 721 | std::uniform_int_distribution<int> col_distribution( |
| 722 | options.min_col_block_size, options.max_col_block_size); |
| 723 | std::uniform_int_distribution<int> row_distribution( |
| 724 | options.min_row_block_size, options.max_row_block_size); |
| 725 | auto bs = std::make_unique<CompressedRowBlockStructure>(); |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 726 | if (options.col_blocks.empty()) { |
| 727 | CHECK_GT(options.num_col_blocks, 0); |
| 728 | CHECK_GT(options.min_col_block_size, 0); |
| 729 | CHECK_GT(options.max_col_block_size, 0); |
| 730 | CHECK_LE(options.min_col_block_size, options.max_col_block_size); |
| 731 | |
| 732 | // Generate the col block structure. |
| 733 | int col_block_position = 0; |
| 734 | for (int i = 0; i < options.num_col_blocks; ++i) { |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 735 | const int col_block_size = col_distribution(prng); |
| 736 | bs->cols.emplace_back(col_block_size, col_block_position); |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 737 | col_block_position += col_block_size; |
| 738 | } |
| 739 | } else { |
| 740 | bs->cols = options.col_blocks; |
| 741 | } |
| 742 | |
| 743 | bool matrix_has_blocks = false; |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 744 | std::uniform_real_distribution<double> uniform01(0.0, 1.0); |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 745 | while (!matrix_has_blocks) { |
| 746 | VLOG(1) << "Clearing"; |
| 747 | bs->rows.clear(); |
| 748 | int row_block_position = 0; |
| 749 | int value_position = 0; |
| 750 | for (int r = 0; r < options.num_row_blocks; ++r) { |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 751 | const int row_block_size = row_distribution(prng); |
| 752 | bs->rows.emplace_back(); |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 753 | CompressedRow& row = bs->rows.back(); |
| 754 | row.block.size = row_block_size; |
| 755 | row.block.position = row_block_position; |
| 756 | row_block_position += row_block_size; |
| 757 | for (int c = 0; c < bs->cols.size(); ++c) { |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 758 | if (uniform01(prng) > options.block_density) continue; |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 759 | |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 760 | row.cells.emplace_back(); |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 761 | Cell& cell = row.cells.back(); |
| 762 | cell.block_id = c; |
| 763 | cell.position = value_position; |
| 764 | value_position += row_block_size * bs->cols[c].size; |
| 765 | matrix_has_blocks = true; |
| 766 | } |
| 767 | } |
| 768 | } |
| 769 | |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 770 | auto matrix = |
| 771 | std::make_unique<BlockSparseMatrix>(bs.release(), use_page_locked_memory); |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 772 | double* values = matrix->mutable_values(); |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 773 | std::normal_distribution<double> standard_normal_distribution; |
| 774 | std::generate_n( |
| 775 | values, matrix->num_nonzeros(), [&standard_normal_distribution, &prng] { |
| 776 | return standard_normal_distribution(prng); |
| 777 | }); |
Austin Schuh | 70cc955 | 2019-01-21 19:46:48 -0800 | [diff] [blame] | 778 | |
| 779 | return matrix; |
| 780 | } |
| 781 | |
Austin Schuh | 3de38b0 | 2024-06-25 18:25:10 -0700 | [diff] [blame^] | 782 | std::unique_ptr<CompressedRowBlockStructure> CreateTranspose( |
| 783 | const CompressedRowBlockStructure& bs) { |
| 784 | auto transpose = std::make_unique<CompressedRowBlockStructure>(); |
| 785 | |
| 786 | transpose->rows.resize(bs.cols.size()); |
| 787 | for (int i = 0; i < bs.cols.size(); ++i) { |
| 788 | transpose->rows[i].block = bs.cols[i]; |
| 789 | transpose->rows[i].nnz = 0; |
| 790 | } |
| 791 | |
| 792 | transpose->cols.resize(bs.rows.size()); |
| 793 | for (int i = 0; i < bs.rows.size(); ++i) { |
| 794 | auto& row = bs.rows[i]; |
| 795 | transpose->cols[i] = row.block; |
| 796 | |
| 797 | const int nrows = row.block.size; |
| 798 | for (auto& cell : row.cells) { |
| 799 | transpose->rows[cell.block_id].cells.emplace_back(i, cell.position); |
| 800 | const int ncols = transpose->rows[cell.block_id].block.size; |
| 801 | transpose->rows[cell.block_id].nnz += nrows * ncols; |
| 802 | } |
| 803 | } |
| 804 | ComputeCumulativeNumberOfNonZeros(transpose->rows); |
| 805 | return transpose; |
| 806 | } |
| 807 | |
| 808 | double* BlockSparseMatrix::AllocateValues(int size) { |
| 809 | if (!use_page_locked_memory_) { |
| 810 | return new double[size]; |
| 811 | } |
| 812 | |
| 813 | #ifndef CERES_NO_CUDA |
| 814 | |
| 815 | double* values = nullptr; |
| 816 | CHECK_EQ(cudaSuccess, |
| 817 | cudaHostAlloc(&values, sizeof(double) * size, cudaHostAllocDefault)); |
| 818 | return values; |
| 819 | #else |
| 820 | LOG(FATAL) << "Page locked memory requested when CUDA is not available. " |
| 821 | << "This is a Ceres bug; please contact the developers!"; |
| 822 | return nullptr; |
| 823 | #endif |
| 824 | }; |
| 825 | |
| 826 | void BlockSparseMatrix::FreeValues(double*& values) { |
| 827 | if (!use_page_locked_memory_) { |
| 828 | delete[] values; |
| 829 | values = nullptr; |
| 830 | return; |
| 831 | } |
| 832 | |
| 833 | #ifndef CERES_NO_CUDA |
| 834 | CHECK_EQ(cudaSuccess, cudaFreeHost(values)); |
| 835 | values = nullptr; |
| 836 | #else |
| 837 | LOG(FATAL) << "Page locked memory requested when CUDA is not available. " |
| 838 | << "This is a Ceres bug; please contact the developers!"; |
| 839 | #endif |
| 840 | }; |
| 841 | |
| 842 | } // namespace ceres::internal |