Blame - internal/ceres/block_sparse_matrix.cc - RealtimeRoboticsGroup/test

blob: 2efee398e5138add6fb5f6eb368492f0468c9533 [file] [log] [blame]

Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	1	// Ceres Solver - A fast non-linear least squares minimizer
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	2	// Copyright 2023 Google Inc. All rights reserved.
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	3	// http://ceres-solver.org/
				4	//
				5	// Redistribution and use in source and binary forms, with or without
				6	// modification, are permitted provided that the following conditions are met:
				7	//
				8	// * Redistributions of source code must retain the above copyright notice,
				9	// this list of conditions and the following disclaimer.
				10	// * Redistributions in binary form must reproduce the above copyright notice,
				11	// this list of conditions and the following disclaimer in the documentation
				12	// and/or other materials provided with the distribution.
				13	// * Neither the name of Google Inc. nor the names of its contributors may be
				14	// used to endorse or promote products derived from this software without
				15	// specific prior written permission.
				16	//
				17	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
				18	// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
				19	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
				20	// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
				21	// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
				22	// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
				23	// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
				24	// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
				25	// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
				26	// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
				27	// POSSIBILITY OF SUCH DAMAGE.
				28	//
				29	// Author: sameeragarwal@google.com (Sameer Agarwal)
				30
				31	#include "ceres/block_sparse_matrix.h"
				32
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	33	#include <algorithm>
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame]	34	#include <cstddef>
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	35	#include <memory>
				36	#include <numeric>
				37	#include <random>
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	38	#include <vector>
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame]	39
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	40	#include "ceres/block_structure.h"
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	41	#include "ceres/crs_matrix.h"
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	42	#include "ceres/internal/eigen.h"
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	43	#include "ceres/parallel_for.h"
				44	#include "ceres/parallel_vector_ops.h"
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	45	#include "ceres/small_blas.h"
				46	#include "ceres/triplet_sparse_matrix.h"
				47	#include "glog/logging.h"
				48
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	49	#ifndef CERES_NO_CUDA
				50	#include "cuda_runtime.h"
				51	#endif
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	52
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	53	namespace ceres::internal {
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	54
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	55	namespace {
				56	void ComputeCumulativeNumberOfNonZeros(std::vector<CompressedList>& rows) {
				57	if (rows.empty()) {
				58	return;
				59	}
				60	rows[0].cumulative_nnz = rows[0].nnz;
				61	for (int c = 1; c < rows.size(); ++c) {
				62	const int curr_nnz = rows[c].nnz;
				63	rows[c].cumulative_nnz = curr_nnz + rows[c - 1].cumulative_nnz;
				64	}
				65	}
				66
				67	template <bool transpose>
				68	std::unique_ptr<CompressedRowSparseMatrix>
				69	CreateStructureOfCompressedRowSparseMatrix(
				70	int num_rows,
				71	int num_cols,
				72	int num_nonzeros,
				73	const CompressedRowBlockStructure* block_structure) {
				74	auto crs_matrix = std::make_unique<CompressedRowSparseMatrix>(
				75	num_rows, num_cols, num_nonzeros);
				76	auto crs_cols = crs_matrix->mutable_cols();
				77	auto crs_rows = crs_matrix->mutable_rows();
				78	int value_offset = 0;
				79	const int num_row_blocks = block_structure->rows.size();
				80	const auto& cols = block_structure->cols;
				81	*crs_rows++ = 0;
				82	for (int row_block_id = 0; row_block_id < num_row_blocks; ++row_block_id) {
				83	const auto& row_block = block_structure->rows[row_block_id];
				84	// Empty row block: only requires setting row offsets
				85	if (row_block.cells.empty()) {
				86	std::fill(crs_rows, crs_rows + row_block.block.size, value_offset);
				87	crs_rows += row_block.block.size;
				88	continue;
				89	}
				90
				91	int row_nnz = 0;
				92	if constexpr (transpose) {
				93	// Transposed block structure comes with nnz in row-block filled-in
				94	row_nnz = row_block.nnz / row_block.block.size;
				95	} else {
				96	// Nnz field of non-transposed block structure is not filled and it can
				97	// have non-sequential structure (consider the case of jacobian for
				98	// Schur-complement solver: E and F blocks are stored separately).
				99	for (auto& c : row_block.cells) {
				100	row_nnz += cols[c.block_id].size;
				101	}
				102	}
				103
				104	// Row-wise setup of matrix structure
				105	for (int row = 0; row < row_block.block.size; ++row) {
				106	value_offset += row_nnz;
				107	*crs_rows++ = value_offset;
				108	for (auto& c : row_block.cells) {
				109	const int col_block_size = cols[c.block_id].size;
				110	const int col_position = cols[c.block_id].position;
				111	std::iota(crs_cols, crs_cols + col_block_size, col_position);
				112	crs_cols += col_block_size;
				113	}
				114	}
				115	}
				116	return crs_matrix;
				117	}
				118
				119	template <bool transpose>
				120	void UpdateCompressedRowSparseMatrixImpl(
				121	CompressedRowSparseMatrix* crs_matrix,
				122	const double* values,
				123	const CompressedRowBlockStructure* block_structure) {
				124	auto crs_values = crs_matrix->mutable_values();
				125	auto crs_rows = crs_matrix->mutable_rows();
				126	const int num_row_blocks = block_structure->rows.size();
				127	const auto& cols = block_structure->cols;
				128	for (int row_block_id = 0; row_block_id < num_row_blocks; ++row_block_id) {
				129	const auto& row_block = block_structure->rows[row_block_id];
				130	const int row_block_size = row_block.block.size;
				131	const int row_nnz = crs_rows[1] - crs_rows[0];
				132	crs_rows += row_block_size;
				133
				134	if (row_nnz == 0) {
				135	continue;
				136	}
				137
				138	MatrixRef crs_row_block(crs_values, row_block_size, row_nnz);
				139	int col_offset = 0;
				140	for (auto& c : row_block.cells) {
				141	const int col_block_size = cols[c.block_id].size;
				142	auto crs_cell =
				143	crs_row_block.block(0, col_offset, row_block_size, col_block_size);
				144	if constexpr (transpose) {
				145	// Transposed matrix is filled using transposed block-strucutre
				146	ConstMatrixRef cell(
				147	values + c.position, col_block_size, row_block_size);
				148	crs_cell = cell.transpose();
				149	} else {
				150	ConstMatrixRef cell(
				151	values + c.position, row_block_size, col_block_size);
				152	crs_cell = cell;
				153	}
				154	col_offset += col_block_size;
				155	}
				156	crs_values += row_nnz * row_block_size;
				157	}
				158	}
				159
				160	void SetBlockStructureOfCompressedRowSparseMatrix(
				161	CompressedRowSparseMatrix* crs_matrix,
				162	CompressedRowBlockStructure* block_structure) {
				163	const int num_row_blocks = block_structure->rows.size();
				164	auto& row_blocks = *crs_matrix->mutable_row_blocks();
				165	row_blocks.resize(num_row_blocks);
				166	for (int i = 0; i < num_row_blocks; ++i) {
				167	row_blocks[i] = block_structure->rows[i].block;
				168	}
				169
				170	auto& col_blocks = *crs_matrix->mutable_col_blocks();
				171	col_blocks = block_structure->cols;
				172	}
				173
				174	} // namespace
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	175
				176	BlockSparseMatrix::BlockSparseMatrix(
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	177	CompressedRowBlockStructure* block_structure, bool use_page_locked_memory)
				178	: use_page_locked_memory_(use_page_locked_memory),
				179	num_rows_(0),
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	180	num_cols_(0),
				181	num_nonzeros_(0),
				182	block_structure_(block_structure) {
				183	CHECK(block_structure_ != nullptr);
				184
				185	// Count the number of columns in the matrix.
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	186	for (auto& col : block_structure_->cols) {
				187	num_cols_ += col.size;
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	188	}
				189
				190	// Count the number of non-zero entries and the number of rows in
				191	// the matrix.
				192	for (int i = 0; i < block_structure_->rows.size(); ++i) {
				193	int row_block_size = block_structure_->rows[i].block.size;
				194	num_rows_ += row_block_size;
				195
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	196	const std::vector<Cell>& cells = block_structure_->rows[i].cells;
				197	for (const auto& cell : cells) {
				198	int col_block_id = cell.block_id;
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	199	int col_block_size = block_structure_->cols[col_block_id].size;
				200	num_nonzeros_ += col_block_size * row_block_size;
				201	}
				202	}
				203
				204	CHECK_GE(num_rows_, 0);
				205	CHECK_GE(num_cols_, 0);
				206	CHECK_GE(num_nonzeros_, 0);
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame]	207	VLOG(2) << "Allocating values array with " << num_nonzeros_ * sizeof(double)
				208	<< " bytes."; // NOLINT
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	209
				210	values_ = AllocateValues(num_nonzeros_);
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	211	max_num_nonzeros_ = num_nonzeros_;
				212	CHECK(values_ != nullptr);
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	213	AddTransposeBlockStructure();
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	214	}
				215
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	216	BlockSparseMatrix::~BlockSparseMatrix() { FreeValues(values_); }
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	217
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	218	void BlockSparseMatrix::AddTransposeBlockStructure() {
				219	if (transpose_block_structure_ == nullptr) {
				220	transpose_block_structure_ = CreateTranspose(*block_structure_);
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	221	}
				222	}
				223
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	224	void BlockSparseMatrix::SetZero() {
				225	std::fill(values_, values_ + num_nonzeros_, 0.0);
				226	}
				227
				228	void BlockSparseMatrix::SetZero(ContextImpl* context, int num_threads) {
				229	ParallelSetZero(context, num_threads, values_, num_nonzeros_);
				230	}
				231
				232	void BlockSparseMatrix::RightMultiplyAndAccumulate(const double* x,
				233	double* y) const {
				234	RightMultiplyAndAccumulate(x, y, nullptr, 1);
				235	}
				236
				237	void BlockSparseMatrix::RightMultiplyAndAccumulate(const double* x,
				238	double* y,
				239	ContextImpl* context,
				240	int num_threads) const {
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	241	CHECK(x != nullptr);
				242	CHECK(y != nullptr);
				243
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	244	const auto values = values_;
				245	const auto block_structure = block_structure_.get();
				246	const auto num_row_blocks = block_structure->rows.size();
				247
				248	ParallelFor(context,
				249	0,
				250	num_row_blocks,
				251	num_threads,
				252	[values, block_structure, x, y](int row_block_id) {
				253	const int row_block_pos =
				254	block_structure->rows[row_block_id].block.position;
				255	const int row_block_size =
				256	block_structure->rows[row_block_id].block.size;
				257	const auto& cells = block_structure->rows[row_block_id].cells;
				258	for (const auto& cell : cells) {
				259	const int col_block_id = cell.block_id;
				260	const int col_block_size =
				261	block_structure->cols[col_block_id].size;
				262	const int col_block_pos =
				263	block_structure->cols[col_block_id].position;
				264	MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
				265	values + cell.position,
				266	row_block_size,
				267	col_block_size,
				268	x + col_block_pos,
				269	y + row_block_pos);
				270	}
				271	});
				272	}
				273
				274	// TODO(https://github.com/ceres-solver/ceres-solver/issues/933): This method
				275	// might benefit from caching column-block partition
				276	void BlockSparseMatrix::LeftMultiplyAndAccumulate(const double* x,
				277	double* y,
				278	ContextImpl* context,
				279	int num_threads) const {
				280	// While utilizing transposed structure allows to perform parallel
				281	// left-multiplication by dense vector, it makes access patterns to matrix
				282	// elements scattered. Thus, multiplication using transposed structure
				283	// is only useful for parallel execution
				284	CHECK(x != nullptr);
				285	CHECK(y != nullptr);
				286	if (transpose_block_structure_ == nullptr \|\| num_threads == 1) {
				287	LeftMultiplyAndAccumulate(x, y);
				288	return;
				289	}
				290
				291	auto transpose_bs = transpose_block_structure_.get();
				292	const auto values = values_;
				293	const int num_col_blocks = transpose_bs->rows.size();
				294	if (!num_col_blocks) {
				295	return;
				296	}
				297
				298	// Use non-zero count as iteration cost for guided parallel-for loop
				299	ParallelFor(
				300	context,
				301	0,
				302	num_col_blocks,
				303	num_threads,
				304	[values, transpose_bs, x, y](int row_block_id) {
				305	int row_block_pos = transpose_bs->rows[row_block_id].block.position;
				306	int row_block_size = transpose_bs->rows[row_block_id].block.size;
				307	auto& cells = transpose_bs->rows[row_block_id].cells;
				308
				309	for (auto& cell : cells) {
				310	const int col_block_id = cell.block_id;
				311	const int col_block_size = transpose_bs->cols[col_block_id].size;
				312	const int col_block_pos = transpose_bs->cols[col_block_id].position;
				313	MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
				314	values + cell.position,
				315	col_block_size,
				316	row_block_size,
				317	x + col_block_pos,
				318	y + row_block_pos);
				319	}
				320	},
				321	transpose_bs->rows.data(),
				322	[](const CompressedRow& row) { return row.cumulative_nnz; });
				323	}
				324
				325	void BlockSparseMatrix::LeftMultiplyAndAccumulate(const double* x,
				326	double* y) const {
				327	CHECK(x != nullptr);
				328	CHECK(y != nullptr);
				329	// Single-threaded left products are always computed using a non-transpose
				330	// block structure, because it has linear acess pattern to matrix elements
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	331	for (int i = 0; i < block_structure_->rows.size(); ++i) {
				332	int row_block_pos = block_structure_->rows[i].block.position;
				333	int row_block_size = block_structure_->rows[i].block.size;
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	334	const auto& cells = block_structure_->rows[i].cells;
				335	for (const auto& cell : cells) {
				336	int col_block_id = cell.block_id;
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	337	int col_block_size = block_structure_->cols[col_block_id].size;
				338	int col_block_pos = block_structure_->cols[col_block_id].position;
				339	MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	340	values_ + cell.position,
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame]	341	row_block_size,
				342	col_block_size,
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	343	x + row_block_pos,
				344	y + col_block_pos);
				345	}
				346	}
				347	}
				348
				349	void BlockSparseMatrix::SquaredColumnNorm(double* x) const {
				350	CHECK(x != nullptr);
				351	VectorRef(x, num_cols_).setZero();
				352	for (int i = 0; i < block_structure_->rows.size(); ++i) {
				353	int row_block_size = block_structure_->rows[i].block.size;
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	354	auto& cells = block_structure_->rows[i].cells;
				355	for (const auto& cell : cells) {
				356	int col_block_id = cell.block_id;
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	357	int col_block_size = block_structure_->cols[col_block_id].size;
				358	int col_block_pos = block_structure_->cols[col_block_id].position;
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame]	359	const MatrixRef m(
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	360	values_ + cell.position, row_block_size, col_block_size);
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	361	VectorRef(x + col_block_pos, col_block_size) += m.colwise().squaredNorm();
				362	}
				363	}
				364	}
				365
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	366	// TODO(https://github.com/ceres-solver/ceres-solver/issues/933): This method
				367	// might benefit from caching column-block partition
				368	void BlockSparseMatrix::SquaredColumnNorm(double* x,
				369	ContextImpl* context,
				370	int num_threads) const {
				371	if (transpose_block_structure_ == nullptr \|\| num_threads == 1) {
				372	SquaredColumnNorm(x);
				373	return;
				374	}
				375
				376	CHECK(x != nullptr);
				377	ParallelSetZero(context, num_threads, x, num_cols_);
				378
				379	auto transpose_bs = transpose_block_structure_.get();
				380	const auto values = values_;
				381	const int num_col_blocks = transpose_bs->rows.size();
				382	ParallelFor(
				383	context,
				384	0,
				385	num_col_blocks,
				386	num_threads,
				387	[values, transpose_bs, x](int row_block_id) {
				388	const auto& row = transpose_bs->rows[row_block_id];
				389
				390	for (auto& cell : row.cells) {
				391	const auto& col = transpose_bs->cols[cell.block_id];
				392	const MatrixRef m(values + cell.position, col.size, row.block.size);
				393	VectorRef(x + row.block.position, row.block.size) +=
				394	m.colwise().squaredNorm();
				395	}
				396	},
				397	transpose_bs->rows.data(),
				398	[](const CompressedRow& row) { return row.cumulative_nnz; });
				399	}
				400
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	401	void BlockSparseMatrix::ScaleColumns(const double* scale) {
				402	CHECK(scale != nullptr);
				403
				404	for (int i = 0; i < block_structure_->rows.size(); ++i) {
				405	int row_block_size = block_structure_->rows[i].block.size;
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	406	auto& cells = block_structure_->rows[i].cells;
				407	for (const auto& cell : cells) {
				408	int col_block_id = cell.block_id;
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	409	int col_block_size = block_structure_->cols[col_block_id].size;
				410	int col_block_pos = block_structure_->cols[col_block_id].position;
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	411	MatrixRef m(values_ + cell.position, row_block_size, col_block_size);
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	412	m *= ConstVectorRef(scale + col_block_pos, col_block_size).asDiagonal();
				413	}
				414	}
				415	}
				416
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	417	// TODO(https://github.com/ceres-solver/ceres-solver/issues/933): This method
				418	// might benefit from caching column-block partition
				419	void BlockSparseMatrix::ScaleColumns(const double* scale,
				420	ContextImpl* context,
				421	int num_threads) {
				422	if (transpose_block_structure_ == nullptr \|\| num_threads == 1) {
				423	ScaleColumns(scale);
				424	return;
				425	}
				426
				427	CHECK(scale != nullptr);
				428	auto transpose_bs = transpose_block_structure_.get();
				429	auto values = values_;
				430	const int num_col_blocks = transpose_bs->rows.size();
				431	ParallelFor(
				432	context,
				433	0,
				434	num_col_blocks,
				435	num_threads,
				436	[values, transpose_bs, scale](int row_block_id) {
				437	const auto& row = transpose_bs->rows[row_block_id];
				438
				439	for (auto& cell : row.cells) {
				440	const auto& col = transpose_bs->cols[cell.block_id];
				441	MatrixRef m(values + cell.position, col.size, row.block.size);
				442	m *= ConstVectorRef(scale + row.block.position, row.block.size)
				443	.asDiagonal();
				444	}
				445	},
				446	transpose_bs->rows.data(),
				447	[](const CompressedRow& row) { return row.cumulative_nnz; });
				448	}
				449	std::unique_ptr<CompressedRowSparseMatrix>
				450	BlockSparseMatrix::ToCompressedRowSparseMatrixTranspose() const {
				451	auto bs = transpose_block_structure_.get();
				452	auto crs_matrix = CreateStructureOfCompressedRowSparseMatrix<true>(
				453	num_cols_, num_rows_, num_nonzeros_, bs);
				454
				455	SetBlockStructureOfCompressedRowSparseMatrix(crs_matrix.get(), bs);
				456
				457	UpdateCompressedRowSparseMatrixTranspose(crs_matrix.get());
				458	return crs_matrix;
				459	}
				460
				461	std::unique_ptr<CompressedRowSparseMatrix>
				462	BlockSparseMatrix::ToCompressedRowSparseMatrix() const {
				463	auto crs_matrix = CreateStructureOfCompressedRowSparseMatrix<false>(
				464	num_rows_, num_cols_, num_nonzeros_, block_structure_.get());
				465
				466	SetBlockStructureOfCompressedRowSparseMatrix(crs_matrix.get(),
				467	block_structure_.get());
				468
				469	UpdateCompressedRowSparseMatrix(crs_matrix.get());
				470	return crs_matrix;
				471	}
				472
				473	void BlockSparseMatrix::UpdateCompressedRowSparseMatrixTranspose(
				474	CompressedRowSparseMatrix* crs_matrix) const {
				475	CHECK(crs_matrix != nullptr);
				476	CHECK_EQ(crs_matrix->num_rows(), num_cols_);
				477	CHECK_EQ(crs_matrix->num_cols(), num_rows_);
				478	CHECK_EQ(crs_matrix->num_nonzeros(), num_nonzeros_);
				479	UpdateCompressedRowSparseMatrixImpl<true>(
				480	crs_matrix, values(), transpose_block_structure_.get());
				481	}
				482	void BlockSparseMatrix::UpdateCompressedRowSparseMatrix(
				483	CompressedRowSparseMatrix* crs_matrix) const {
				484	CHECK(crs_matrix != nullptr);
				485	CHECK_EQ(crs_matrix->num_rows(), num_rows_);
				486	CHECK_EQ(crs_matrix->num_cols(), num_cols_);
				487	CHECK_EQ(crs_matrix->num_nonzeros(), num_nonzeros_);
				488	UpdateCompressedRowSparseMatrixImpl<false>(
				489	crs_matrix, values(), block_structure_.get());
				490	}
				491
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	492	void BlockSparseMatrix::ToDenseMatrix(Matrix* dense_matrix) const {
				493	CHECK(dense_matrix != nullptr);
				494
				495	dense_matrix->resize(num_rows_, num_cols_);
				496	dense_matrix->setZero();
				497	Matrix& m = *dense_matrix;
				498
				499	for (int i = 0; i < block_structure_->rows.size(); ++i) {
				500	int row_block_pos = block_structure_->rows[i].block.position;
				501	int row_block_size = block_structure_->rows[i].block.size;
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	502	auto& cells = block_structure_->rows[i].cells;
				503	for (const auto& cell : cells) {
				504	int col_block_id = cell.block_id;
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	505	int col_block_size = block_structure_->cols[col_block_id].size;
				506	int col_block_pos = block_structure_->cols[col_block_id].position;
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	507	int jac_pos = cell.position;
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame]	508	m.block(row_block_pos, col_block_pos, row_block_size, col_block_size) +=
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	509	MatrixRef(values_ + jac_pos, row_block_size, col_block_size);
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	510	}
				511	}
				512	}
				513
				514	void BlockSparseMatrix::ToTripletSparseMatrix(
				515	TripletSparseMatrix* matrix) const {
				516	CHECK(matrix != nullptr);
				517
				518	matrix->Reserve(num_nonzeros_);
				519	matrix->Resize(num_rows_, num_cols_);
				520	matrix->SetZero();
				521
				522	for (int i = 0; i < block_structure_->rows.size(); ++i) {
				523	int row_block_pos = block_structure_->rows[i].block.position;
				524	int row_block_size = block_structure_->rows[i].block.size;
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	525	const auto& cells = block_structure_->rows[i].cells;
				526	for (const auto& cell : cells) {
				527	int col_block_id = cell.block_id;
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	528	int col_block_size = block_structure_->cols[col_block_id].size;
				529	int col_block_pos = block_structure_->cols[col_block_id].position;
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	530	int jac_pos = cell.position;
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame]	531	for (int r = 0; r < row_block_size; ++r) {
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	532	for (int c = 0; c < col_block_size; ++c, ++jac_pos) {
				533	matrix->mutable_rows()[jac_pos] = row_block_pos + r;
				534	matrix->mutable_cols()[jac_pos] = col_block_pos + c;
				535	matrix->mutable_values()[jac_pos] = values_[jac_pos];
				536	}
				537	}
				538	}
				539	}
				540	matrix->set_num_nonzeros(num_nonzeros_);
				541	}
				542
				543	// Return a pointer to the block structure. We continue to hold
				544	// ownership of the object though.
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame]	545	const CompressedRowBlockStructure* BlockSparseMatrix::block_structure() const {
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	546	return block_structure_.get();
				547	}
				548
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	549	// Return a pointer to the block structure of matrix transpose. We continue to
				550	// hold ownership of the object though.
				551	const CompressedRowBlockStructure*
				552	BlockSparseMatrix::transpose_block_structure() const {
				553	return transpose_block_structure_.get();
				554	}
				555
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	556	void BlockSparseMatrix::ToTextFile(FILE* file) const {
				557	CHECK(file != nullptr);
				558	for (int i = 0; i < block_structure_->rows.size(); ++i) {
				559	const int row_block_pos = block_structure_->rows[i].block.position;
				560	const int row_block_size = block_structure_->rows[i].block.size;
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	561	const auto& cells = block_structure_->rows[i].cells;
				562	for (const auto& cell : cells) {
				563	const int col_block_id = cell.block_id;
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	564	const int col_block_size = block_structure_->cols[col_block_id].size;
				565	const int col_block_pos = block_structure_->cols[col_block_id].position;
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	566	int jac_pos = cell.position;
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	567	for (int r = 0; r < row_block_size; ++r) {
				568	for (int c = 0; c < col_block_size; ++c) {
Austin Schuh	1d1e6ea	2020-12-23 21:56:30 -0800	[diff] [blame]	569	fprintf(file,
				570	"% 10d % 10d %17f\n",
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	571	row_block_pos + r,
				572	col_block_pos + c,
				573	values_[jac_pos++]);
				574	}
				575	}
				576	}
				577	}
				578	}
				579
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	580	std::unique_ptr<BlockSparseMatrix> BlockSparseMatrix::CreateDiagonalMatrix(
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	581	const double* diagonal, const std::vector<Block>& column_blocks) {
				582	// Create the block structure for the diagonal matrix.
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	583	auto* bs = new CompressedRowBlockStructure();
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	584	bs->cols = column_blocks;
				585	int position = 0;
				586	bs->rows.resize(column_blocks.size(), CompressedRow(1));
				587	for (int i = 0; i < column_blocks.size(); ++i) {
				588	CompressedRow& row = bs->rows[i];
				589	row.block = column_blocks[i];
				590	Cell& cell = row.cells[0];
				591	cell.block_id = i;
				592	cell.position = position;
				593	position += row.block.size * row.block.size;
				594	}
				595
				596	// Create the BlockSparseMatrix with the given block structure.
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	597	auto matrix = std::make_unique<BlockSparseMatrix>(bs);
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	598	matrix->SetZero();
				599
				600	// Fill the values array of the block sparse matrix.
				601	double* values = matrix->mutable_values();
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	602	for (const auto& column_block : column_blocks) {
				603	const int size = column_block.size;
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	604	for (int j = 0; j < size; ++j) {
				605	// (j + 1) * size is compact way of accessing the (j,j) entry.
				606	values[j * (size + 1)] = diagonal[j];
				607	}
				608	diagonal += size;
				609	values += size * size;
				610	}
				611
				612	return matrix;
				613	}
				614
				615	void BlockSparseMatrix::AppendRows(const BlockSparseMatrix& m) {
				616	CHECK_EQ(m.num_cols(), num_cols());
				617	const CompressedRowBlockStructure* m_bs = m.block_structure();
				618	CHECK_EQ(m_bs->cols.size(), block_structure_->cols.size());
				619
				620	const int old_num_nonzeros = num_nonzeros_;
				621	const int old_num_row_blocks = block_structure_->rows.size();
				622	block_structure_->rows.resize(old_num_row_blocks + m_bs->rows.size());
				623
				624	for (int i = 0; i < m_bs->rows.size(); ++i) {
				625	const CompressedRow& m_row = m_bs->rows[i];
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	626	const int row_block_id = old_num_row_blocks + i;
				627	CompressedRow& row = block_structure_->rows[row_block_id];
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	628	row.block.size = m_row.block.size;
				629	row.block.position = num_rows_;
				630	num_rows_ += m_row.block.size;
				631	row.cells.resize(m_row.cells.size());
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	632	if (transpose_block_structure_) {
				633	transpose_block_structure_->cols.emplace_back(row.block);
				634	}
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	635	for (int c = 0; c < m_row.cells.size(); ++c) {
				636	const int block_id = m_row.cells[c].block_id;
				637	row.cells[c].block_id = block_id;
				638	row.cells[c].position = num_nonzeros_;
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	639
				640	const int cell_nnz = m_row.block.size * m_bs->cols[block_id].size;
				641	if (transpose_block_structure_) {
				642	transpose_block_structure_->rows[block_id].cells.emplace_back(
				643	row_block_id, num_nonzeros_);
				644	transpose_block_structure_->rows[block_id].nnz += cell_nnz;
				645	}
				646
				647	num_nonzeros_ += cell_nnz;
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	648	}
				649	}
				650
				651	if (num_nonzeros_ > max_num_nonzeros_) {
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	652	double* old_values = values_;
				653	values_ = AllocateValues(num_nonzeros_);
				654	std::copy_n(old_values, old_num_nonzeros, values_);
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	655	max_num_nonzeros_ = num_nonzeros_;
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	656	FreeValues(old_values);
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	657	}
				658
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	659	std::copy(
				660	m.values(), m.values() + m.num_nonzeros(), values_ + old_num_nonzeros);
				661
				662	if (transpose_block_structure_ == nullptr) {
				663	return;
				664	}
				665	ComputeCumulativeNumberOfNonZeros(transpose_block_structure_->rows);
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	666	}
				667
				668	void BlockSparseMatrix::DeleteRowBlocks(const int delta_row_blocks) {
				669	const int num_row_blocks = block_structure_->rows.size();
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	670	const int new_num_row_blocks = num_row_blocks - delta_row_blocks;
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	671	int delta_num_nonzeros = 0;
				672	int delta_num_rows = 0;
				673	const std::vector<Block>& column_blocks = block_structure_->cols;
				674	for (int i = 0; i < delta_row_blocks; ++i) {
				675	const CompressedRow& row = block_structure_->rows[num_row_blocks - i - 1];
				676	delta_num_rows += row.block.size;
				677	for (int c = 0; c < row.cells.size(); ++c) {
				678	const Cell& cell = row.cells[c];
				679	delta_num_nonzeros += row.block.size * column_blocks[cell.block_id].size;
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	680
				681	if (transpose_block_structure_) {
				682	auto& col_cells = transpose_block_structure_->rows[cell.block_id].cells;
				683	while (!col_cells.empty() &&
				684	col_cells.back().block_id >= new_num_row_blocks) {
				685	const int del_block_id = col_cells.back().block_id;
				686	const int del_block_rows =
				687	block_structure_->rows[del_block_id].block.size;
				688	const int del_block_cols = column_blocks[cell.block_id].size;
				689	const int del_cell_nnz = del_block_rows * del_block_cols;
				690	transpose_block_structure_->rows[cell.block_id].nnz -= del_cell_nnz;
				691	col_cells.pop_back();
				692	}
				693	}
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	694	}
				695	}
				696	num_nonzeros_ -= delta_num_nonzeros;
				697	num_rows_ -= delta_num_rows;
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	698	block_structure_->rows.resize(new_num_row_blocks);
				699
				700	if (transpose_block_structure_ == nullptr) {
				701	return;
				702	}
				703	for (int i = 0; i < delta_row_blocks; ++i) {
				704	transpose_block_structure_->cols.pop_back();
				705	}
				706
				707	ComputeCumulativeNumberOfNonZeros(transpose_block_structure_->rows);
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	708	}
				709
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	710	std::unique_ptr<BlockSparseMatrix> BlockSparseMatrix::CreateRandomMatrix(
				711	const BlockSparseMatrix::RandomMatrixOptions& options,
				712	std::mt19937& prng,
				713	bool use_page_locked_memory) {
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	714	CHECK_GT(options.num_row_blocks, 0);
				715	CHECK_GT(options.min_row_block_size, 0);
				716	CHECK_GT(options.max_row_block_size, 0);
				717	CHECK_LE(options.min_row_block_size, options.max_row_block_size);
				718	CHECK_GT(options.block_density, 0.0);
				719	CHECK_LE(options.block_density, 1.0);
				720
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	721	std::uniform_int_distribution<int> col_distribution(
				722	options.min_col_block_size, options.max_col_block_size);
				723	std::uniform_int_distribution<int> row_distribution(
				724	options.min_row_block_size, options.max_row_block_size);
				725	auto bs = std::make_unique<CompressedRowBlockStructure>();
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	726	if (options.col_blocks.empty()) {
				727	CHECK_GT(options.num_col_blocks, 0);
				728	CHECK_GT(options.min_col_block_size, 0);
				729	CHECK_GT(options.max_col_block_size, 0);
				730	CHECK_LE(options.min_col_block_size, options.max_col_block_size);
				731
				732	// Generate the col block structure.
				733	int col_block_position = 0;
				734	for (int i = 0; i < options.num_col_blocks; ++i) {
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	735	const int col_block_size = col_distribution(prng);
				736	bs->cols.emplace_back(col_block_size, col_block_position);
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	737	col_block_position += col_block_size;
				738	}
				739	} else {
				740	bs->cols = options.col_blocks;
				741	}
				742
				743	bool matrix_has_blocks = false;
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	744	std::uniform_real_distribution<double> uniform01(0.0, 1.0);
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	745	while (!matrix_has_blocks) {
				746	VLOG(1) << "Clearing";
				747	bs->rows.clear();
				748	int row_block_position = 0;
				749	int value_position = 0;
				750	for (int r = 0; r < options.num_row_blocks; ++r) {
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	751	const int row_block_size = row_distribution(prng);
				752	bs->rows.emplace_back();
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	753	CompressedRow& row = bs->rows.back();
				754	row.block.size = row_block_size;
				755	row.block.position = row_block_position;
				756	row_block_position += row_block_size;
				757	for (int c = 0; c < bs->cols.size(); ++c) {
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	758	if (uniform01(prng) > options.block_density) continue;
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	759
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	760	row.cells.emplace_back();
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	761	Cell& cell = row.cells.back();
				762	cell.block_id = c;
				763	cell.position = value_position;
				764	value_position += row_block_size * bs->cols[c].size;
				765	matrix_has_blocks = true;
				766	}
				767	}
				768	}
				769
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	770	auto matrix =
				771	std::make_unique<BlockSparseMatrix>(bs.release(), use_page_locked_memory);
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	772	double* values = matrix->mutable_values();
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	773	std::normal_distribution<double> standard_normal_distribution;
				774	std::generate_n(
				775	values, matrix->num_nonzeros(), [&standard_normal_distribution, &prng] {
				776	return standard_normal_distribution(prng);
				777	});
Austin Schuh	70cc955	2019-01-21 19:46:48 -0800	[diff] [blame]	778
				779	return matrix;
				780	}
				781
Austin Schuh	3de38b0	2024-06-25 18:25:10 -0700	[diff] [blame^]	782	std::unique_ptr<CompressedRowBlockStructure> CreateTranspose(
				783	const CompressedRowBlockStructure& bs) {
				784	auto transpose = std::make_unique<CompressedRowBlockStructure>();
				785
				786	transpose->rows.resize(bs.cols.size());
				787	for (int i = 0; i < bs.cols.size(); ++i) {
				788	transpose->rows[i].block = bs.cols[i];
				789	transpose->rows[i].nnz = 0;
				790	}
				791
				792	transpose->cols.resize(bs.rows.size());
				793	for (int i = 0; i < bs.rows.size(); ++i) {
				794	auto& row = bs.rows[i];
				795	transpose->cols[i] = row.block;
				796
				797	const int nrows = row.block.size;
				798	for (auto& cell : row.cells) {
				799	transpose->rows[cell.block_id].cells.emplace_back(i, cell.position);
				800	const int ncols = transpose->rows[cell.block_id].block.size;
				801	transpose->rows[cell.block_id].nnz += nrows * ncols;
				802	}
				803	}
				804	ComputeCumulativeNumberOfNonZeros(transpose->rows);
				805	return transpose;
				806	}
				807
				808	double* BlockSparseMatrix::AllocateValues(int size) {
				809	if (!use_page_locked_memory_) {
				810	return new double[size];
				811	}
				812
				813	#ifndef CERES_NO_CUDA
				814
				815	double* values = nullptr;
				816	CHECK_EQ(cudaSuccess,
				817	cudaHostAlloc(&values, sizeof(double) * size, cudaHostAllocDefault));
				818	return values;
				819	#else
				820	LOG(FATAL) << "Page locked memory requested when CUDA is not available. "
				821	<< "This is a Ceres bug; please contact the developers!";
				822	return nullptr;
				823	#endif
				824	};
				825
				826	void BlockSparseMatrix::FreeValues(double*& values) {
				827	if (!use_page_locked_memory_) {
				828	delete[] values;
				829	values = nullptr;
				830	return;
				831	}
				832
				833	#ifndef CERES_NO_CUDA
				834	CHECK_EQ(cudaSuccess, cudaFreeHost(values));
				835	values = nullptr;
				836	#else
				837	LOG(FATAL) << "Page locked memory requested when CUDA is not available. "
				838	<< "This is a Ceres bug; please contact the developers!";
				839	#endif
				840	};
				841
				842	} // namespace ceres::internal