blob: 0baadc08c39a045fdba01bb99775119a2eed12a4 [file] [log] [blame]
Austin Schuh70cc9552019-01-21 19:46:48 -08001// Ceres Solver - A fast non-linear least squares minimizer
Austin Schuh3de38b02024-06-25 18:25:10 -07002// Copyright 2023 Google Inc. All rights reserved.
Austin Schuh70cc9552019-01-21 19:46:48 -08003// http://ceres-solver.org/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7//
8// * Redistributions of source code must retain the above copyright notice,
9// this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above copyright notice,
11// this list of conditions and the following disclaimer in the documentation
12// and/or other materials provided with the distribution.
13// * Neither the name of Google Inc. nor the names of its contributors may be
14// used to endorse or promote products derived from this software without
15// specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27// POSSIBILITY OF SUCH DAMAGE.
28//
29// Author: alexs.mac@gmail.com (Alex Stewart)
30
31// This include must come before any #ifndef check on Ceres compile options.
Austin Schuh3de38b02024-06-25 18:25:10 -070032#include "ceres/internal/config.h"
Austin Schuh70cc9552019-01-21 19:46:48 -080033
34#ifndef CERES_NO_ACCELERATE_SPARSE
35
Austin Schuh70cc9552019-01-21 19:46:48 -080036#include <algorithm>
Austin Schuh3de38b02024-06-25 18:25:10 -070037#include <memory>
Austin Schuh70cc9552019-01-21 19:46:48 -080038#include <string>
39#include <vector>
40
Austin Schuh1d1e6ea2020-12-23 21:56:30 -080041#include "ceres/accelerate_sparse.h"
Austin Schuh70cc9552019-01-21 19:46:48 -080042#include "ceres/compressed_col_sparse_matrix_utils.h"
43#include "ceres/compressed_row_sparse_matrix.h"
44#include "ceres/triplet_sparse_matrix.h"
45#include "glog/logging.h"
46
Austin Schuh1d1e6ea2020-12-23 21:56:30 -080047#define CASESTR(x) \
48 case x: \
49 return #x
Austin Schuh70cc9552019-01-21 19:46:48 -080050
51namespace ceres {
52namespace internal {
53
Austin Schuh1d1e6ea2020-12-23 21:56:30 -080054namespace {
Austin Schuh70cc9552019-01-21 19:46:48 -080055const char* SparseStatusToString(SparseStatus_t status) {
56 switch (status) {
57 CASESTR(SparseStatusOK);
58 CASESTR(SparseFactorizationFailed);
59 CASESTR(SparseMatrixIsSingular);
60 CASESTR(SparseInternalError);
61 CASESTR(SparseParameterError);
62 CASESTR(SparseStatusReleased);
63 default:
Austin Schuh3de38b02024-06-25 18:25:10 -070064 return "UNKNOWN";
Austin Schuh70cc9552019-01-21 19:46:48 -080065 }
66}
Austin Schuh1d1e6ea2020-12-23 21:56:30 -080067} // namespace.
Austin Schuh70cc9552019-01-21 19:46:48 -080068
Austin Schuh1d1e6ea2020-12-23 21:56:30 -080069// Resizes workspace as required to contain at least required_size bytes
70// aligned to kAccelerateRequiredAlignment and returns a pointer to the
71// aligned start.
72void* ResizeForAccelerateAlignment(const size_t required_size,
73 std::vector<uint8_t>* workspace) {
74 // As per the Accelerate documentation, all workspace memory passed to the
75 // sparse solver functions must be 16-byte aligned.
76 constexpr int kAccelerateRequiredAlignment = 16;
77 // Although malloc() on macOS should always be 16-byte aligned, it is unclear
78 // if this holds for new(), or on other Apple OSs (phoneOS, watchOS etc).
79 // As such we assume it is not and use std::align() to create a (potentially
80 // offset) 16-byte aligned sub-buffer of the specified size within workspace.
81 workspace->resize(required_size + kAccelerateRequiredAlignment);
82 size_t size_from_aligned_start = workspace->size();
83 void* aligned_solve_workspace_start =
84 reinterpret_cast<void*>(workspace->data());
85 aligned_solve_workspace_start = std::align(kAccelerateRequiredAlignment,
86 required_size,
87 aligned_solve_workspace_start,
88 size_from_aligned_start);
89 CHECK(aligned_solve_workspace_start != nullptr)
90 << "required_size: " << required_size
91 << ", workspace size: " << workspace->size();
92 return aligned_solve_workspace_start;
Austin Schuh70cc9552019-01-21 19:46:48 -080093}
94
Austin Schuh1d1e6ea2020-12-23 21:56:30 -080095template <typename Scalar>
96void AccelerateSparse<Scalar>::Solve(NumericFactorization* numeric_factor,
97 DenseVector* rhs_and_solution) {
98 // From SparseSolve() documentation in Solve.h
99 const int required_size = numeric_factor->solveWorkspaceRequiredStatic +
100 numeric_factor->solveWorkspaceRequiredPerRHS;
101 SparseSolve(*numeric_factor,
102 *rhs_and_solution,
103 ResizeForAccelerateAlignment(required_size, &solve_workspace_));
104}
105
106template <typename Scalar>
Austin Schuh70cc9552019-01-21 19:46:48 -0800107typename AccelerateSparse<Scalar>::ASSparseMatrix
108AccelerateSparse<Scalar>::CreateSparseMatrixTransposeView(
109 CompressedRowSparseMatrix* A) {
110 // Accelerate uses CSC as its sparse storage format whereas Ceres uses CSR.
111 // As this method returns the transpose view we can flip rows/cols to map
112 // from CSR to CSC^T.
113 //
114 // Accelerate's columnStarts is a long*, not an int*. These types might be
115 // different (e.g. ARM on iOS) so always make a copy.
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800116 column_starts_.resize(A->num_rows() + 1); // +1 for final column length.
Austin Schuh3de38b02024-06-25 18:25:10 -0700117 std::copy_n(A->rows(), column_starts_.size(), column_starts_.data());
Austin Schuh70cc9552019-01-21 19:46:48 -0800118
119 ASSparseMatrix At;
120 At.structure.rowCount = A->num_cols();
121 At.structure.columnCount = A->num_rows();
Austin Schuh3de38b02024-06-25 18:25:10 -0700122 At.structure.columnStarts = column_starts_.data();
Austin Schuh70cc9552019-01-21 19:46:48 -0800123 At.structure.rowIndices = A->mutable_cols();
124 At.structure.attributes.transpose = false;
125 At.structure.attributes.triangle = SparseUpperTriangle;
126 At.structure.attributes.kind = SparseSymmetric;
127 At.structure.attributes._reserved = 0;
128 At.structure.attributes._allocatedBySparse = 0;
129 At.structure.blockSize = 1;
Austin Schuh3de38b02024-06-25 18:25:10 -0700130 if constexpr (std::is_same_v<Scalar, double>) {
131 At.data = A->mutable_values();
Austin Schuh70cc9552019-01-21 19:46:48 -0800132 } else {
133 values_ =
134 ConstVectorRef(A->values(), A->num_nonzeros()).template cast<Scalar>();
135 At.data = values_.data();
136 }
137 return At;
138}
139
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800140template <typename Scalar>
Austin Schuh70cc9552019-01-21 19:46:48 -0800141typename AccelerateSparse<Scalar>::SymbolicFactorization
Austin Schuh3de38b02024-06-25 18:25:10 -0700142AccelerateSparse<Scalar>::AnalyzeCholesky(OrderingType ordering_type,
143 ASSparseMatrix* A) {
144 SparseSymbolicFactorOptions sfoption;
145 sfoption.control = SparseDefaultControl;
146 sfoption.orderMethod = SparseOrderDefault;
147 sfoption.order = nullptr;
148 sfoption.ignoreRowsAndColumns = nullptr;
149 sfoption.malloc = malloc;
150 sfoption.free = free;
151 sfoption.reportError = nullptr;
152
153 if (ordering_type == OrderingType::AMD) {
154 sfoption.orderMethod = SparseOrderAMD;
155 } else if (ordering_type == OrderingType::NESDIS) {
156 sfoption.orderMethod = SparseOrderMetis;
157 }
158 return SparseFactor(SparseFactorizationCholesky, A->structure, sfoption);
Austin Schuh70cc9552019-01-21 19:46:48 -0800159}
160
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800161template <typename Scalar>
Austin Schuh70cc9552019-01-21 19:46:48 -0800162typename AccelerateSparse<Scalar>::NumericFactorization
163AccelerateSparse<Scalar>::Cholesky(ASSparseMatrix* A,
164 SymbolicFactorization* symbolic_factor) {
165 return SparseFactor(*symbolic_factor, *A);
166}
167
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800168template <typename Scalar>
Austin Schuh70cc9552019-01-21 19:46:48 -0800169void AccelerateSparse<Scalar>::Cholesky(ASSparseMatrix* A,
170 NumericFactorization* numeric_factor) {
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800171 // From SparseRefactor() documentation in Solve.h
172 const int required_size =
173 std::is_same<Scalar, double>::value
174 ? numeric_factor->symbolicFactorization.workspaceSize_Double
175 : numeric_factor->symbolicFactorization.workspaceSize_Float;
176 return SparseRefactor(
177 *A,
178 numeric_factor,
179 ResizeForAccelerateAlignment(required_size, &factorization_workspace_));
Austin Schuh70cc9552019-01-21 19:46:48 -0800180}
181
182// Instantiate only for the specific template types required/supported s/t the
183// definition can be in the .cc file.
184template class AccelerateSparse<double>;
185template class AccelerateSparse<float>;
186
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800187template <typename Scalar>
188std::unique_ptr<SparseCholesky> AppleAccelerateCholesky<Scalar>::Create(
189 OrderingType ordering_type) {
Austin Schuh70cc9552019-01-21 19:46:48 -0800190 return std::unique_ptr<SparseCholesky>(
191 new AppleAccelerateCholesky<Scalar>(ordering_type));
192}
193
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800194template <typename Scalar>
Austin Schuh70cc9552019-01-21 19:46:48 -0800195AppleAccelerateCholesky<Scalar>::AppleAccelerateCholesky(
196 const OrderingType ordering_type)
197 : ordering_type_(ordering_type) {}
198
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800199template <typename Scalar>
Austin Schuh70cc9552019-01-21 19:46:48 -0800200AppleAccelerateCholesky<Scalar>::~AppleAccelerateCholesky() {
201 FreeSymbolicFactorization();
202 FreeNumericFactorization();
203}
204
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800205template <typename Scalar>
Austin Schuh70cc9552019-01-21 19:46:48 -0800206CompressedRowSparseMatrix::StorageType
207AppleAccelerateCholesky<Scalar>::StorageType() const {
Austin Schuh3de38b02024-06-25 18:25:10 -0700208 return CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR;
Austin Schuh70cc9552019-01-21 19:46:48 -0800209}
210
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800211template <typename Scalar>
212LinearSolverTerminationType AppleAccelerateCholesky<Scalar>::Factorize(
213 CompressedRowSparseMatrix* lhs, std::string* message) {
Austin Schuh70cc9552019-01-21 19:46:48 -0800214 CHECK_EQ(lhs->storage_type(), StorageType());
Austin Schuh3de38b02024-06-25 18:25:10 -0700215 if (lhs == nullptr) {
216 *message = "Failure: Input lhs is nullptr.";
217 return LinearSolverTerminationType::FATAL_ERROR;
Austin Schuh70cc9552019-01-21 19:46:48 -0800218 }
219 typename SparseTypesTrait<Scalar>::SparseMatrix as_lhs =
220 as_.CreateSparseMatrixTransposeView(lhs);
221
222 if (!symbolic_factor_) {
Austin Schuh3de38b02024-06-25 18:25:10 -0700223 symbolic_factor_ = std::make_unique<
224 typename SparseTypesTrait<Scalar>::SymbolicFactorization>(
225 as_.AnalyzeCholesky(ordering_type_, &as_lhs));
226
Austin Schuh70cc9552019-01-21 19:46:48 -0800227 if (symbolic_factor_->status != SparseStatusOK) {
228 *message = StringPrintf(
229 "Apple Accelerate Failure : Symbolic factorisation failed: %s",
230 SparseStatusToString(symbolic_factor_->status));
231 FreeSymbolicFactorization();
Austin Schuh3de38b02024-06-25 18:25:10 -0700232 return LinearSolverTerminationType::FATAL_ERROR;
Austin Schuh70cc9552019-01-21 19:46:48 -0800233 }
234 }
235
236 if (!numeric_factor_) {
Austin Schuh3de38b02024-06-25 18:25:10 -0700237 numeric_factor_ = std::make_unique<
238 typename SparseTypesTrait<Scalar>::NumericFactorization>(
239 as_.Cholesky(&as_lhs, symbolic_factor_.get()));
Austin Schuh70cc9552019-01-21 19:46:48 -0800240 } else {
241 // Recycle memory from previous numeric factorization.
242 as_.Cholesky(&as_lhs, numeric_factor_.get());
243 }
244 if (numeric_factor_->status != SparseStatusOK) {
245 *message = StringPrintf(
246 "Apple Accelerate Failure : Numeric factorisation failed: %s",
247 SparseStatusToString(numeric_factor_->status));
248 FreeNumericFactorization();
Austin Schuh3de38b02024-06-25 18:25:10 -0700249 return LinearSolverTerminationType::FAILURE;
Austin Schuh70cc9552019-01-21 19:46:48 -0800250 }
251
Austin Schuh3de38b02024-06-25 18:25:10 -0700252 return LinearSolverTerminationType::SUCCESS;
Austin Schuh70cc9552019-01-21 19:46:48 -0800253}
254
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800255template <typename Scalar>
256LinearSolverTerminationType AppleAccelerateCholesky<Scalar>::Solve(
257 const double* rhs, double* solution, std::string* message) {
Austin Schuh70cc9552019-01-21 19:46:48 -0800258 CHECK_EQ(numeric_factor_->status, SparseStatusOK)
259 << "Solve called without a call to Factorize first ("
260 << SparseStatusToString(numeric_factor_->status) << ").";
261 const int num_cols = numeric_factor_->symbolicFactorization.columnCount;
262
263 typename SparseTypesTrait<Scalar>::DenseVector as_rhs_and_solution;
264 as_rhs_and_solution.count = num_cols;
Austin Schuh3de38b02024-06-25 18:25:10 -0700265 if constexpr (std::is_same_v<Scalar, double>) {
266 as_rhs_and_solution.data = solution;
Austin Schuh70cc9552019-01-21 19:46:48 -0800267 std::copy_n(rhs, num_cols, solution);
268 } else {
269 scalar_rhs_and_solution_ =
270 ConstVectorRef(rhs, num_cols).template cast<Scalar>();
271 as_rhs_and_solution.data = scalar_rhs_and_solution_.data();
272 }
273 as_.Solve(numeric_factor_.get(), &as_rhs_and_solution);
274 if (!std::is_same<Scalar, double>::value) {
275 VectorRef(solution, num_cols) =
276 scalar_rhs_and_solution_.template cast<double>();
277 }
Austin Schuh3de38b02024-06-25 18:25:10 -0700278 return LinearSolverTerminationType::SUCCESS;
Austin Schuh70cc9552019-01-21 19:46:48 -0800279}
280
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800281template <typename Scalar>
Austin Schuh70cc9552019-01-21 19:46:48 -0800282void AppleAccelerateCholesky<Scalar>::FreeSymbolicFactorization() {
283 if (symbolic_factor_) {
284 SparseCleanup(*symbolic_factor_);
Austin Schuh3de38b02024-06-25 18:25:10 -0700285 symbolic_factor_ = nullptr;
Austin Schuh70cc9552019-01-21 19:46:48 -0800286 }
287}
288
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800289template <typename Scalar>
Austin Schuh70cc9552019-01-21 19:46:48 -0800290void AppleAccelerateCholesky<Scalar>::FreeNumericFactorization() {
291 if (numeric_factor_) {
292 SparseCleanup(*numeric_factor_);
Austin Schuh3de38b02024-06-25 18:25:10 -0700293 numeric_factor_ = nullptr;
Austin Schuh70cc9552019-01-21 19:46:48 -0800294 }
295}
296
297// Instantiate only for the specific template types required/supported s/t the
298// definition can be in the .cc file.
299template class AppleAccelerateCholesky<double>;
300template class AppleAccelerateCholesky<float>;
301
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800302} // namespace internal
303} // namespace ceres
Austin Schuh70cc9552019-01-21 19:46:48 -0800304
305#endif // CERES_NO_ACCELERATE_SPARSE