blob: d2b642bf5dcae21a95baa863fd407241382f90fe [file] [log] [blame]
Austin Schuh70cc9552019-01-21 19:46:48 -08001// Ceres Solver - A fast non-linear least squares minimizer
2// Copyright 2018 Google Inc. All rights reserved.
3// http://ceres-solver.org/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7//
8// * Redistributions of source code must retain the above copyright notice,
9// this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above copyright notice,
11// this list of conditions and the following disclaimer in the documentation
12// and/or other materials provided with the distribution.
13// * Neither the name of Google Inc. nor the names of its contributors may be
14// used to endorse or promote products derived from this software without
15// specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27// POSSIBILITY OF SUCH DAMAGE.
28//
29// Author: alexs.mac@gmail.com (Alex Stewart)
30
31// This include must come before any #ifndef check on Ceres compile options.
32#include "ceres/internal/port.h"
33
34#ifndef CERES_NO_ACCELERATE_SPARSE
35
Austin Schuh70cc9552019-01-21 19:46:48 -080036#include <algorithm>
37#include <string>
38#include <vector>
39
Austin Schuh1d1e6ea2020-12-23 21:56:30 -080040#include "ceres/accelerate_sparse.h"
Austin Schuh70cc9552019-01-21 19:46:48 -080041#include "ceres/compressed_col_sparse_matrix_utils.h"
42#include "ceres/compressed_row_sparse_matrix.h"
43#include "ceres/triplet_sparse_matrix.h"
44#include "glog/logging.h"
45
Austin Schuh1d1e6ea2020-12-23 21:56:30 -080046#define CASESTR(x) \
47 case x: \
48 return #x
Austin Schuh70cc9552019-01-21 19:46:48 -080049
50namespace ceres {
51namespace internal {
52
Austin Schuh1d1e6ea2020-12-23 21:56:30 -080053namespace {
Austin Schuh70cc9552019-01-21 19:46:48 -080054const char* SparseStatusToString(SparseStatus_t status) {
55 switch (status) {
56 CASESTR(SparseStatusOK);
57 CASESTR(SparseFactorizationFailed);
58 CASESTR(SparseMatrixIsSingular);
59 CASESTR(SparseInternalError);
60 CASESTR(SparseParameterError);
61 CASESTR(SparseStatusReleased);
62 default:
63 return "UKNOWN";
64 }
65}
Austin Schuh1d1e6ea2020-12-23 21:56:30 -080066} // namespace.
Austin Schuh70cc9552019-01-21 19:46:48 -080067
Austin Schuh1d1e6ea2020-12-23 21:56:30 -080068// Resizes workspace as required to contain at least required_size bytes
69// aligned to kAccelerateRequiredAlignment and returns a pointer to the
70// aligned start.
71void* ResizeForAccelerateAlignment(const size_t required_size,
72 std::vector<uint8_t>* workspace) {
73 // As per the Accelerate documentation, all workspace memory passed to the
74 // sparse solver functions must be 16-byte aligned.
75 constexpr int kAccelerateRequiredAlignment = 16;
76 // Although malloc() on macOS should always be 16-byte aligned, it is unclear
77 // if this holds for new(), or on other Apple OSs (phoneOS, watchOS etc).
78 // As such we assume it is not and use std::align() to create a (potentially
79 // offset) 16-byte aligned sub-buffer of the specified size within workspace.
80 workspace->resize(required_size + kAccelerateRequiredAlignment);
81 size_t size_from_aligned_start = workspace->size();
82 void* aligned_solve_workspace_start =
83 reinterpret_cast<void*>(workspace->data());
84 aligned_solve_workspace_start = std::align(kAccelerateRequiredAlignment,
85 required_size,
86 aligned_solve_workspace_start,
87 size_from_aligned_start);
88 CHECK(aligned_solve_workspace_start != nullptr)
89 << "required_size: " << required_size
90 << ", workspace size: " << workspace->size();
91 return aligned_solve_workspace_start;
Austin Schuh70cc9552019-01-21 19:46:48 -080092}
93
Austin Schuh1d1e6ea2020-12-23 21:56:30 -080094template <typename Scalar>
95void AccelerateSparse<Scalar>::Solve(NumericFactorization* numeric_factor,
96 DenseVector* rhs_and_solution) {
97 // From SparseSolve() documentation in Solve.h
98 const int required_size = numeric_factor->solveWorkspaceRequiredStatic +
99 numeric_factor->solveWorkspaceRequiredPerRHS;
100 SparseSolve(*numeric_factor,
101 *rhs_and_solution,
102 ResizeForAccelerateAlignment(required_size, &solve_workspace_));
103}
104
105template <typename Scalar>
Austin Schuh70cc9552019-01-21 19:46:48 -0800106typename AccelerateSparse<Scalar>::ASSparseMatrix
107AccelerateSparse<Scalar>::CreateSparseMatrixTransposeView(
108 CompressedRowSparseMatrix* A) {
109 // Accelerate uses CSC as its sparse storage format whereas Ceres uses CSR.
110 // As this method returns the transpose view we can flip rows/cols to map
111 // from CSR to CSC^T.
112 //
113 // Accelerate's columnStarts is a long*, not an int*. These types might be
114 // different (e.g. ARM on iOS) so always make a copy.
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800115 column_starts_.resize(A->num_rows() + 1); // +1 for final column length.
Austin Schuh70cc9552019-01-21 19:46:48 -0800116 std::copy_n(A->rows(), column_starts_.size(), &column_starts_[0]);
117
118 ASSparseMatrix At;
119 At.structure.rowCount = A->num_cols();
120 At.structure.columnCount = A->num_rows();
121 At.structure.columnStarts = &column_starts_[0];
122 At.structure.rowIndices = A->mutable_cols();
123 At.structure.attributes.transpose = false;
124 At.structure.attributes.triangle = SparseUpperTriangle;
125 At.structure.attributes.kind = SparseSymmetric;
126 At.structure.attributes._reserved = 0;
127 At.structure.attributes._allocatedBySparse = 0;
128 At.structure.blockSize = 1;
129 if (std::is_same<Scalar, double>::value) {
130 At.data = reinterpret_cast<Scalar*>(A->mutable_values());
131 } else {
132 values_ =
133 ConstVectorRef(A->values(), A->num_nonzeros()).template cast<Scalar>();
134 At.data = values_.data();
135 }
136 return At;
137}
138
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800139template <typename Scalar>
Austin Schuh70cc9552019-01-21 19:46:48 -0800140typename AccelerateSparse<Scalar>::SymbolicFactorization
141AccelerateSparse<Scalar>::AnalyzeCholesky(ASSparseMatrix* A) {
142 return SparseFactor(SparseFactorizationCholesky, A->structure);
143}
144
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800145template <typename Scalar>
Austin Schuh70cc9552019-01-21 19:46:48 -0800146typename AccelerateSparse<Scalar>::NumericFactorization
147AccelerateSparse<Scalar>::Cholesky(ASSparseMatrix* A,
148 SymbolicFactorization* symbolic_factor) {
149 return SparseFactor(*symbolic_factor, *A);
150}
151
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800152template <typename Scalar>
Austin Schuh70cc9552019-01-21 19:46:48 -0800153void AccelerateSparse<Scalar>::Cholesky(ASSparseMatrix* A,
154 NumericFactorization* numeric_factor) {
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800155 // From SparseRefactor() documentation in Solve.h
156 const int required_size =
157 std::is_same<Scalar, double>::value
158 ? numeric_factor->symbolicFactorization.workspaceSize_Double
159 : numeric_factor->symbolicFactorization.workspaceSize_Float;
160 return SparseRefactor(
161 *A,
162 numeric_factor,
163 ResizeForAccelerateAlignment(required_size, &factorization_workspace_));
Austin Schuh70cc9552019-01-21 19:46:48 -0800164}
165
166// Instantiate only for the specific template types required/supported s/t the
167// definition can be in the .cc file.
168template class AccelerateSparse<double>;
169template class AccelerateSparse<float>;
170
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800171template <typename Scalar>
172std::unique_ptr<SparseCholesky> AppleAccelerateCholesky<Scalar>::Create(
173 OrderingType ordering_type) {
Austin Schuh70cc9552019-01-21 19:46:48 -0800174 return std::unique_ptr<SparseCholesky>(
175 new AppleAccelerateCholesky<Scalar>(ordering_type));
176}
177
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800178template <typename Scalar>
Austin Schuh70cc9552019-01-21 19:46:48 -0800179AppleAccelerateCholesky<Scalar>::AppleAccelerateCholesky(
180 const OrderingType ordering_type)
181 : ordering_type_(ordering_type) {}
182
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800183template <typename Scalar>
Austin Schuh70cc9552019-01-21 19:46:48 -0800184AppleAccelerateCholesky<Scalar>::~AppleAccelerateCholesky() {
185 FreeSymbolicFactorization();
186 FreeNumericFactorization();
187}
188
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800189template <typename Scalar>
Austin Schuh70cc9552019-01-21 19:46:48 -0800190CompressedRowSparseMatrix::StorageType
191AppleAccelerateCholesky<Scalar>::StorageType() const {
192 return CompressedRowSparseMatrix::LOWER_TRIANGULAR;
193}
194
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800195template <typename Scalar>
196LinearSolverTerminationType AppleAccelerateCholesky<Scalar>::Factorize(
197 CompressedRowSparseMatrix* lhs, std::string* message) {
Austin Schuh70cc9552019-01-21 19:46:48 -0800198 CHECK_EQ(lhs->storage_type(), StorageType());
199 if (lhs == NULL) {
200 *message = "Failure: Input lhs is NULL.";
201 return LINEAR_SOLVER_FATAL_ERROR;
202 }
203 typename SparseTypesTrait<Scalar>::SparseMatrix as_lhs =
204 as_.CreateSparseMatrixTransposeView(lhs);
205
206 if (!symbolic_factor_) {
207 symbolic_factor_.reset(
208 new typename SparseTypesTrait<Scalar>::SymbolicFactorization(
209 as_.AnalyzeCholesky(&as_lhs)));
210 if (symbolic_factor_->status != SparseStatusOK) {
211 *message = StringPrintf(
212 "Apple Accelerate Failure : Symbolic factorisation failed: %s",
213 SparseStatusToString(symbolic_factor_->status));
214 FreeSymbolicFactorization();
215 return LINEAR_SOLVER_FATAL_ERROR;
216 }
217 }
218
219 if (!numeric_factor_) {
220 numeric_factor_.reset(
221 new typename SparseTypesTrait<Scalar>::NumericFactorization(
222 as_.Cholesky(&as_lhs, symbolic_factor_.get())));
223 } else {
224 // Recycle memory from previous numeric factorization.
225 as_.Cholesky(&as_lhs, numeric_factor_.get());
226 }
227 if (numeric_factor_->status != SparseStatusOK) {
228 *message = StringPrintf(
229 "Apple Accelerate Failure : Numeric factorisation failed: %s",
230 SparseStatusToString(numeric_factor_->status));
231 FreeNumericFactorization();
232 return LINEAR_SOLVER_FAILURE;
233 }
234
235 return LINEAR_SOLVER_SUCCESS;
236}
237
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800238template <typename Scalar>
239LinearSolverTerminationType AppleAccelerateCholesky<Scalar>::Solve(
240 const double* rhs, double* solution, std::string* message) {
Austin Schuh70cc9552019-01-21 19:46:48 -0800241 CHECK_EQ(numeric_factor_->status, SparseStatusOK)
242 << "Solve called without a call to Factorize first ("
243 << SparseStatusToString(numeric_factor_->status) << ").";
244 const int num_cols = numeric_factor_->symbolicFactorization.columnCount;
245
246 typename SparseTypesTrait<Scalar>::DenseVector as_rhs_and_solution;
247 as_rhs_and_solution.count = num_cols;
248 if (std::is_same<Scalar, double>::value) {
249 as_rhs_and_solution.data = reinterpret_cast<Scalar*>(solution);
250 std::copy_n(rhs, num_cols, solution);
251 } else {
252 scalar_rhs_and_solution_ =
253 ConstVectorRef(rhs, num_cols).template cast<Scalar>();
254 as_rhs_and_solution.data = scalar_rhs_and_solution_.data();
255 }
256 as_.Solve(numeric_factor_.get(), &as_rhs_and_solution);
257 if (!std::is_same<Scalar, double>::value) {
258 VectorRef(solution, num_cols) =
259 scalar_rhs_and_solution_.template cast<double>();
260 }
261 return LINEAR_SOLVER_SUCCESS;
262}
263
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800264template <typename Scalar>
Austin Schuh70cc9552019-01-21 19:46:48 -0800265void AppleAccelerateCholesky<Scalar>::FreeSymbolicFactorization() {
266 if (symbolic_factor_) {
267 SparseCleanup(*symbolic_factor_);
268 symbolic_factor_.reset();
269 }
270}
271
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800272template <typename Scalar>
Austin Schuh70cc9552019-01-21 19:46:48 -0800273void AppleAccelerateCholesky<Scalar>::FreeNumericFactorization() {
274 if (numeric_factor_) {
275 SparseCleanup(*numeric_factor_);
276 numeric_factor_.reset();
277 }
278}
279
280// Instantiate only for the specific template types required/supported s/t the
281// definition can be in the .cc file.
282template class AppleAccelerateCholesky<double>;
283template class AppleAccelerateCholesky<float>;
284
Austin Schuh1d1e6ea2020-12-23 21:56:30 -0800285} // namespace internal
286} // namespace ceres
Austin Schuh70cc9552019-01-21 19:46:48 -0800287
288#endif // CERES_NO_ACCELERATE_SPARSE