Blame - internal/ceres/partitioned_matrix_view_impl.h - RealtimeRoboticsGroup/test

2019-01-21 19:46:48 -0800

[diff] [blame]

1

// Ceres Solver - A fast non-linear least squares minimizer

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

2

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

3

// http://ceres-solver.org/

4

//

5

// Redistribution and use in source and binary forms, with or without

6

// modification, are permitted provided that the following conditions are met:

7

//

8

// * Redistributions of source code must retain the above copyright notice,

9

// this list of conditions and the following disclaimer.

10

// * Redistributions in binary form must reproduce the above copyright notice,

11

// this list of conditions and the following disclaimer in the documentation

12

// and/or other materials provided with the distribution.

13

// * Neither the name of Google Inc. nor the names of its contributors may be

14

// used to endorse or promote products derived from this software without

15

// specific prior written permission.

16

//

17

// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

18

// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

19

// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

20

// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE

21

// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

22

// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

23

// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

24

// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

25

// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

26

// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

27

// POSSIBILITY OF SUCH DAMAGE.

28

//

29

// Author: sameeragarwal@google.com (Sameer Agarwal)

30

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

31

#include <algorithm>

32

#include <cstring>

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

33

#include <memory>

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

34

#include <vector>

Austin Schuh

2020-12-23 21:56:30 -0800

[diff] [blame]

35

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

36

#include "ceres/block_sparse_matrix.h"

37

#include "ceres/block_structure.h"

38

#include "ceres/internal/eigen.h"

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

39

#include "ceres/parallel_for.h"

40

#include "ceres/partition_range_for_parallel_for.h"

Austin Schuh

2020-12-23 21:56:30 -0800

[diff] [blame]

41

#include "ceres/partitioned_matrix_view.h"

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

42

#include "ceres/small_blas.h"

43

#include "glog/logging.h"

44

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

45

namespace ceres::internal {

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

46

47

template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>

48

PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

49

PartitionedMatrixView(const LinearSolver::Options& options,

50

const BlockSparseMatrix& matrix)

51

52

: options_(options), matrix_(matrix) {

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

53

const CompressedRowBlockStructure* bs = matrix_.block_structure();

54

CHECK(bs != nullptr);

55

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

56

num_col_blocks_e_ = options_.elimination_groups[0];

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

57

num_col_blocks_f_ = bs->cols.size() - num_col_blocks_e_;

58

59

// Compute the number of row blocks in E. The number of row blocks

60

// in E maybe less than the number of row blocks in the input matrix

61

// as some of the row blocks at the bottom may not have any

62

// e_blocks. For a definition of what an e_block is, please see

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

63

// schur_complement_solver.h

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

64

num_row_blocks_e_ = 0;

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

65

for (const auto& row : bs->rows) {

66

const std::vector<Cell>& cells = row.cells;

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

67

if (cells[0].block_id < num_col_blocks_e_) {

++num_row_blocks_e_;

}

}

// Compute the number of columns in E and F.

num_cols_e_ = 0;

num_cols_f_ = 0;

for (int c = 0; c < bs->cols.size(); ++c) {

77

const Block& block = bs->cols[c];

78

if (c < num_col_blocks_e_) {

79

num_cols_e_ += block.size;

80

} else {

81

num_cols_f_ += block.size;

}

}

CHECK_EQ(num_cols_e_ + num_cols_f_, matrix_.num_cols());

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

86

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

87

auto transpose_bs = matrix_.transpose_block_structure();

88

const int num_threads = options_.num_threads;

89

if (transpose_bs != nullptr && num_threads > 1) {

90

int kMaxPartitions = num_threads * 4;

91

e_cols_partition_ = PartitionRangeForParallelFor(

0,

num_col_blocks_e_,

kMaxPartitions,

transpose_bs->rows.data(),

96

[](const CompressedRow& row) { return row.cumulative_nnz; });

97

98

f_cols_partition_ = PartitionRangeForParallelFor(

99

num_col_blocks_e_,

100

num_col_blocks_e_ + num_col_blocks_f_,

101

kMaxPartitions,

102

transpose_bs->rows.data(),

103

[](const CompressedRow& row) { return row.cumulative_nnz; });

104

}

105

}

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

106

107

// The next four methods don't seem to be particularly cache

108

// friendly. This is an artifact of how the BlockStructure of the

109

// input matrix is constructed. These methods will benefit from

110

// multithreading as well as improved data layout.

111

112

template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>

Austin Schuh

2020-12-23 21:56:30 -0800

[diff] [blame]

113

void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

114

RightMultiplyAndAccumulateE(const double* x, double* y) const {

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

115

// Iterate over the first num_row_blocks_e_ row blocks, and multiply

116

// by the first cell in each row block.

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

117

auto bs = matrix_.block_structure();

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

118

const double* values = matrix_.values();

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

119

ParallelFor(options_.context,

120

0,

121

num_row_blocks_e_,

122

options_.num_threads,

123

[values, bs, x, y](int row_block_id) {

124

const Cell& cell = bs->rows[row_block_id].cells[0];

125

const int row_block_pos = bs->rows[row_block_id].block.position;

126

const int row_block_size = bs->rows[row_block_id].block.size;

127

const int col_block_id = cell.block_id;

128

const int col_block_pos = bs->cols[col_block_id].position;

129

const int col_block_size = bs->cols[col_block_id].size;

130

// clang-format off

131

MatrixVectorMultiply<kRowBlockSize, kEBlockSize, 1>(

132

values + cell.position, row_block_size, col_block_size,

x + col_block_pos,

y + row_block_pos);

// clang-format on

});

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

137

}

138

139

template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>

Austin Schuh

2020-12-23 21:56:30 -0800

[diff] [blame]

140

void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

141

RightMultiplyAndAccumulateF(const double* x, double* y) const {

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

142

// Iterate over row blocks, and if the row block is in E, then

143

// multiply by all the cells except the first one which is of type

144

// E. If the row block is not in E (i.e its in the bottom

145

// num_row_blocks - num_row_blocks_e row blocks), then all the cells

146

// are of type F and multiply by them all.

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

147

const CompressedRowBlockStructure* bs = matrix_.block_structure();

148

const int num_row_blocks = bs->rows.size();

149

const int num_cols_e = num_cols_e_;

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

150

const double* values = matrix_.values();

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

151

ParallelFor(options_.context,

152

0,

153

num_row_blocks_e_,

154

options_.num_threads,

155

[values, bs, num_cols_e, x, y](int row_block_id) {

156

const int row_block_pos = bs->rows[row_block_id].block.position;

157

const int row_block_size = bs->rows[row_block_id].block.size;

158

const auto& cells = bs->rows[row_block_id].cells;

159

for (int c = 1; c < cells.size(); ++c) {

160

const int col_block_id = cells[c].block_id;

161

const int col_block_pos = bs->cols[col_block_id].position;

162

const int col_block_size = bs->cols[col_block_id].size;

163

// clang-format off

164

MatrixVectorMultiply<kRowBlockSize, kFBlockSize, 1>(

165

values + cells[c].position, row_block_size, col_block_size,

166

x + col_block_pos - num_cols_e,

y + row_block_pos);

// clang-format on

}

});

ParallelFor(options_.context,

172

num_row_blocks_e_,

173

num_row_blocks,

174

options_.num_threads,

175

[values, bs, num_cols_e, x, y](int row_block_id) {

176

const int row_block_pos = bs->rows[row_block_id].block.position;

177

const int row_block_size = bs->rows[row_block_id].block.size;

178

const auto& cells = bs->rows[row_block_id].cells;

179

for (const auto& cell : cells) {

180

const int col_block_id = cell.block_id;

181

const int col_block_pos = bs->cols[col_block_id].position;

182

const int col_block_size = bs->cols[col_block_id].size;

183

// clang-format off

184

MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(

185

values + cell.position, row_block_size, col_block_size,

186

x + col_block_pos - num_cols_e,

y + row_block_pos);

// clang-format on

}

});

}

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

192

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

193

template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>

194

void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::

195

LeftMultiplyAndAccumulateE(const double* x, double* y) const {

196

if (!num_col_blocks_e_) return;

197

if (!num_row_blocks_e_) return;

198

if (options_.num_threads == 1) {

199

LeftMultiplyAndAccumulateESingleThreaded(x, y);

200

} else {

201

CHECK(options_.context != nullptr);

202

LeftMultiplyAndAccumulateEMultiThreaded(x, y);

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

}

}

template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>

Austin Schuh

2020-12-23 21:56:30 -0800

[diff] [blame]

207

void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

208

LeftMultiplyAndAccumulateESingleThreaded(const double* x, double* y) const {

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

209

const CompressedRowBlockStructure* bs = matrix_.block_structure();

210

211

// Iterate over the first num_row_blocks_e_ row blocks, and multiply

212

// by the first cell in each row block.

213

const double* values = matrix_.values();

214

for (int r = 0; r < num_row_blocks_e_; ++r) {

215

const Cell& cell = bs->rows[r].cells[0];

216

const int row_block_pos = bs->rows[r].block.position;

217

const int row_block_size = bs->rows[r].block.size;

218

const int col_block_id = cell.block_id;

219

const int col_block_pos = bs->cols[col_block_id].position;

220

const int col_block_size = bs->cols[col_block_id].size;

Austin Schuh

2020-12-23 21:56:30 -0800

[diff] [blame]

221

// clang-format off

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

222

MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>(

223

values + cell.position, row_block_size, col_block_size,

224

x + row_block_pos,

225

y + col_block_pos);

Austin Schuh

2020-12-23 21:56:30 -0800

[diff] [blame]

226

// clang-format on

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

}

}

template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>

Austin Schuh

2020-12-23 21:56:30 -0800

[diff] [blame]

231

void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

232

LeftMultiplyAndAccumulateEMultiThreaded(const double* x, double* y) const {

233

auto transpose_bs = matrix_.transpose_block_structure();

234

CHECK(transpose_bs != nullptr);

235

236

// Local copies of class members in order to avoid capturing pointer to the

237

// whole object in lambda function

238

auto values = matrix_.values();

239

const int num_row_blocks_e = num_row_blocks_e_;

ParallelFor(

options_.context,

0,

num_col_blocks_e_,

options_.num_threads,

245

[values, transpose_bs, num_row_blocks_e, x, y](int row_block_id) {

246

int row_block_pos = transpose_bs->rows[row_block_id].block.position;

247

int row_block_size = transpose_bs->rows[row_block_id].block.size;

248

auto& cells = transpose_bs->rows[row_block_id].cells;

249

250

for (auto& cell : cells) {

251

const int col_block_id = cell.block_id;

252

const int col_block_size = transpose_bs->cols[col_block_id].size;

253

const int col_block_pos = transpose_bs->cols[col_block_id].position;

254

if (col_block_id >= num_row_blocks_e) break;

255

MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>(

256

values + cell.position,

col_block_size,

row_block_size,

x + col_block_pos,

y + row_block_pos);

}

},

e_cols_partition());

}

template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>

267

void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::

268

LeftMultiplyAndAccumulateF(const double* x, double* y) const {

269

if (!num_col_blocks_f_) return;

270

if (options_.num_threads == 1) {

271

LeftMultiplyAndAccumulateFSingleThreaded(x, y);

272

} else {

273

CHECK(options_.context != nullptr);

274

LeftMultiplyAndAccumulateFMultiThreaded(x, y);

}

}

template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>

279

void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::

280

LeftMultiplyAndAccumulateFSingleThreaded(const double* x, double* y) const {

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

281

const CompressedRowBlockStructure* bs = matrix_.block_structure();

282

283

// Iterate over row blocks, and if the row block is in E, then

284

// multiply by all the cells except the first one which is of type

285

// E. If the row block is not in E (i.e its in the bottom

286

// num_row_blocks - num_row_blocks_e row blocks), then all the cells

287

// are of type F and multiply by them all.

288

const double* values = matrix_.values();

289

for (int r = 0; r < num_row_blocks_e_; ++r) {

290

const int row_block_pos = bs->rows[r].block.position;

291

const int row_block_size = bs->rows[r].block.size;

292

const std::vector<Cell>& cells = bs->rows[r].cells;

293

for (int c = 1; c < cells.size(); ++c) {

294

const int col_block_id = cells[c].block_id;

295

const int col_block_pos = bs->cols[col_block_id].position;

296

const int col_block_size = bs->cols[col_block_id].size;

Austin Schuh

2020-12-23 21:56:30 -0800

[diff] [blame]

297

// clang-format off

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

298

MatrixTransposeVectorMultiply<kRowBlockSize, kFBlockSize, 1>(

299

values + cells[c].position, row_block_size, col_block_size,

300

x + row_block_pos,

301

y + col_block_pos - num_cols_e_);

Austin Schuh

2020-12-23 21:56:30 -0800

[diff] [blame]

302

// clang-format on

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

}

}

for (int r = num_row_blocks_e_; r < bs->rows.size(); ++r) {

307

const int row_block_pos = bs->rows[r].block.position;

308

const int row_block_size = bs->rows[r].block.size;

309

const std::vector<Cell>& cells = bs->rows[r].cells;

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

310

for (const auto& cell : cells) {

311

const int col_block_id = cell.block_id;

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

312

const int col_block_pos = bs->cols[col_block_id].position;

313

const int col_block_size = bs->cols[col_block_id].size;

Austin Schuh

2020-12-23 21:56:30 -0800

[diff] [blame]

314

// clang-format off

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

315

MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

316

values + cell.position, row_block_size, col_block_size,

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

317

x + row_block_pos,

318

y + col_block_pos - num_cols_e_);

Austin Schuh

2020-12-23 21:56:30 -0800

[diff] [blame]

319

// clang-format on

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

}

}

}

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

324

template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>

325

void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::

326

LeftMultiplyAndAccumulateFMultiThreaded(const double* x, double* y) const {

327

auto transpose_bs = matrix_.transpose_block_structure();

328

CHECK(transpose_bs != nullptr);

329

// Local copies of class members in order to avoid capturing pointer to the

330

// whole object in lambda function

331

auto values = matrix_.values();

332

const int num_row_blocks_e = num_row_blocks_e_;

333

const int num_cols_e = num_cols_e_;

ParallelFor(

options_.context,

num_col_blocks_e_,

num_col_blocks_e_ + num_col_blocks_f_,

338

options_.num_threads,

339

[values, transpose_bs, num_row_blocks_e, num_cols_e, x, y](

340

int row_block_id) {

341

int row_block_pos = transpose_bs->rows[row_block_id].block.position;

342

int row_block_size = transpose_bs->rows[row_block_id].block.size;

343

auto& cells = transpose_bs->rows[row_block_id].cells;

344

345

const int num_cells = cells.size();

346

int cell_idx = 0;

347

for (; cell_idx < num_cells; ++cell_idx) {

348

auto& cell = cells[cell_idx];

349

const int col_block_id = cell.block_id;

350

const int col_block_size = transpose_bs->cols[col_block_id].size;

351

const int col_block_pos = transpose_bs->cols[col_block_id].position;

352

if (col_block_id >= num_row_blocks_e) break;

353

354

MatrixTransposeVectorMultiply<kRowBlockSize, kFBlockSize, 1>(

355

values + cell.position,

col_block_size,

row_block_size,

x + col_block_pos,

y + row_block_pos - num_cols_e);

360

}

361

for (; cell_idx < num_cells; ++cell_idx) {

362

auto& cell = cells[cell_idx];

363

const int col_block_id = cell.block_id;

364

const int col_block_size = transpose_bs->cols[col_block_id].size;

365

const int col_block_pos = transpose_bs->cols[col_block_id].position;

366

MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(

367

values + cell.position,

col_block_size,

row_block_size,

x + col_block_pos,

y + row_block_pos - num_cols_e);

}

},

f_cols_partition());

}

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

377

// Given a range of columns blocks of a matrix m, compute the block

378

// structure of the block diagonal of the matrix m(:,

379

// start_col_block:end_col_block)'m(:, start_col_block:end_col_block)

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

380

// and return a BlockSparseMatrix with this block structure. The

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

381

// caller owns the result.

382

template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

383

std::unique_ptr<BlockSparseMatrix>

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

384

PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::

Austin Schuh

2020-12-23 21:56:30 -0800

[diff] [blame]

385

CreateBlockDiagonalMatrixLayout(int start_col_block,

386

int end_col_block) const {

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

387

const CompressedRowBlockStructure* bs = matrix_.block_structure();

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

388

auto* block_diagonal_structure = new CompressedRowBlockStructure;

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

389

390

int block_position = 0;

391

int diagonal_cell_position = 0;

392

393

// Iterate over the column blocks, creating a new diagonal block for

394

// each column block.

395

for (int c = start_col_block; c < end_col_block; ++c) {

396

const Block& block = bs->cols[c];

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

397

block_diagonal_structure->cols.emplace_back();

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

398

Block& diagonal_block = block_diagonal_structure->cols.back();

399

diagonal_block.size = block.size;

400

diagonal_block.position = block_position;

401

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

402

block_diagonal_structure->rows.emplace_back();

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

403

CompressedRow& row = block_diagonal_structure->rows.back();

404

row.block = diagonal_block;

405

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

406

row.cells.emplace_back();

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

407

Cell& cell = row.cells.back();

408

cell.block_id = c - start_col_block;

409

cell.position = diagonal_cell_position;

410

411

block_position += block.size;

412

diagonal_cell_position += block.size * block.size;

413

}

414

415

// Build a BlockSparseMatrix with the just computed block

416

// structure.

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

417

return std::make_unique<BlockSparseMatrix>(block_diagonal_structure);

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

418

}

419

420

template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

421

std::unique_ptr<BlockSparseMatrix>

422

PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::

423

CreateBlockDiagonalEtE() const {

424

std::unique_ptr<BlockSparseMatrix> block_diagonal =

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

425

CreateBlockDiagonalMatrixLayout(0, num_col_blocks_e_);

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

426

UpdateBlockDiagonalEtE(block_diagonal.get());

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

427

return block_diagonal;

428

}

429

430

template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

431

std::unique_ptr<BlockSparseMatrix>

432

PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::

433

CreateBlockDiagonalFtF() const {

434

std::unique_ptr<BlockSparseMatrix> block_diagonal =

435

CreateBlockDiagonalMatrixLayout(num_col_blocks_e_,

436

num_col_blocks_e_ + num_col_blocks_f_);

437

UpdateBlockDiagonalFtF(block_diagonal.get());

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

438

return block_diagonal;

439

}

440

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

441

// Similar to the code in RightMultiplyAndAccumulateE, except instead of the

442

// matrix vector multiply its an outer product.

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

443

//

444

// block_diagonal = block_diagonal(E'E)

445

//

446

template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>

Austin Schuh

2020-12-23 21:56:30 -0800

[diff] [blame]

447

void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

448

UpdateBlockDiagonalEtESingleThreaded(

449

BlockSparseMatrix* block_diagonal) const {

450

auto bs = matrix_.block_structure();

451

auto block_diagonal_structure = block_diagonal->block_structure();

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

452

453

block_diagonal->SetZero();

454

const double* values = matrix_.values();

Austin Schuh

2020-12-23 21:56:30 -0800

[diff] [blame]

455

for (int r = 0; r < num_row_blocks_e_; ++r) {

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

456

const Cell& cell = bs->rows[r].cells[0];

457

const int row_block_size = bs->rows[r].block.size;

458

const int block_id = cell.block_id;

459

const int col_block_size = bs->cols[block_id].size;

460

const int cell_position =

461

block_diagonal_structure->rows[block_id].cells[0].position;

462

Austin Schuh

2020-12-23 21:56:30 -0800

[diff] [blame]

463

// clang-format off

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

464

MatrixTransposeMatrixMultiply

465

<kRowBlockSize, kEBlockSize, kRowBlockSize, kEBlockSize, 1>(

466

values + cell.position, row_block_size, col_block_size,

467

values + cell.position, row_block_size, col_block_size,

468

block_diagonal->mutable_values() + cell_position,

469

0, 0, col_block_size, col_block_size);

Austin Schuh

2020-12-23 21:56:30 -0800

[diff] [blame]

470

// clang-format on

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

}

}

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

474

template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>

475

void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::

476

UpdateBlockDiagonalEtEMultiThreaded(

477

BlockSparseMatrix* block_diagonal) const {

478

auto transpose_block_structure = matrix_.transpose_block_structure();

479

CHECK(transpose_block_structure != nullptr);

480

auto block_diagonal_structure = block_diagonal->block_structure();

481

482

const double* values = matrix_.values();

483

double* values_diagonal = block_diagonal->mutable_values();

ParallelFor(

options_.context,

0,

num_col_blocks_e_,

options_.num_threads,

489

[values,

490

transpose_block_structure,

491

values_diagonal,

492

block_diagonal_structure](int col_block_id) {

493

int cell_position =

494

block_diagonal_structure->rows[col_block_id].cells[0].position;

495

double* cell_values = values_diagonal + cell_position;

496

int col_block_size =

497

transpose_block_structure->rows[col_block_id].block.size;

498

auto& cells = transpose_block_structure->rows[col_block_id].cells;

499

MatrixRef(cell_values, col_block_size, col_block_size).setZero();

500

501

for (auto& c : cells) {

502

int row_block_size = transpose_block_structure->cols[c.block_id].size;

503

// clang-format off

504

MatrixTransposeMatrixMultiply<kRowBlockSize, kEBlockSize, kRowBlockSize, kEBlockSize, 1>(

505

values + c.position, row_block_size, col_block_size,

506

values + c.position, row_block_size, col_block_size,

507

cell_values, 0, 0, col_block_size, col_block_size);

// clang-format on

}

},

e_cols_partition_);

}

template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>

515

void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::

516

UpdateBlockDiagonalEtE(BlockSparseMatrix* block_diagonal) const {

517

if (options_.num_threads == 1) {

518

UpdateBlockDiagonalEtESingleThreaded(block_diagonal);

519

} else {

520

CHECK(options_.context != nullptr);

521

UpdateBlockDiagonalEtEMultiThreaded(block_diagonal);

}

}

// Similar to the code in RightMultiplyAndAccumulateF, except instead of the

526

// matrix vector multiply its an outer product.

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

527

//

528

// block_diagonal = block_diagonal(F'F)

529

//

530

template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>

Austin Schuh

2020-12-23 21:56:30 -0800

[diff] [blame]

531

void PartitionedMatrixView<kRowBlockSize, kEBlockSize, kFBlockSize>::

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

532

UpdateBlockDiagonalFtFSingleThreaded(

533

BlockSparseMatrix* block_diagonal) const {

534

auto bs = matrix_.block_structure();

535

auto block_diagonal_structure = block_diagonal->block_structure();

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

536

537

block_diagonal->SetZero();

538

const double* values = matrix_.values();

539

for (int r = 0; r < num_row_blocks_e_; ++r) {

540

const int row_block_size = bs->rows[r].block.size;

541

const std::vector<Cell>& cells = bs->rows[r].cells;

542

for (int c = 1; c < cells.size(); ++c) {

543

const int col_block_id = cells[c].block_id;

544

const int col_block_size = bs->cols[col_block_id].size;

545

const int diagonal_block_id = col_block_id - num_col_blocks_e_;

546

const int cell_position =

547

block_diagonal_structure->rows[diagonal_block_id].cells[0].position;

548

Austin Schuh

2020-12-23 21:56:30 -0800

[diff] [blame]

549

// clang-format off

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

550

MatrixTransposeMatrixMultiply

551

<kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>(

552

values + cells[c].position, row_block_size, col_block_size,

553

values + cells[c].position, row_block_size, col_block_size,

554

block_diagonal->mutable_values() + cell_position,

555

0, 0, col_block_size, col_block_size);

Austin Schuh

2020-12-23 21:56:30 -0800

[diff] [blame]

556

// clang-format on

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

}

}

for (int r = num_row_blocks_e_; r < bs->rows.size(); ++r) {

561

const int row_block_size = bs->rows[r].block.size;

562

const std::vector<Cell>& cells = bs->rows[r].cells;

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

563

for (const auto& cell : cells) {

564

const int col_block_id = cell.block_id;

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

565

const int col_block_size = bs->cols[col_block_id].size;

566

const int diagonal_block_id = col_block_id - num_col_blocks_e_;

567

const int cell_position =

568

block_diagonal_structure->rows[diagonal_block_id].cells[0].position;

569

Austin Schuh

2020-12-23 21:56:30 -0800

[diff] [blame]

570

// clang-format off

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

571

MatrixTransposeMatrixMultiply

572

<Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(

Austin Schuh

2024-06-25 18:25:10 -0700

[diff] [blame^]

573

values + cell.position, row_block_size, col_block_size,

574

values + cell.position, row_block_size, col_block_size,

Austin Schuh

2019-01-21 19:46:48 -0800

[diff] [blame]

575

block_diagonal->mutable_values() + cell_position,

576

0, 0, col_block_size, col_block_size);

Austin Schuh

2020-12-23 21:56:30 -0800

[diff] [blame]

577

// clang-format on

Austin Schuh