blob: 46692e6ce20a6dffcc4631cd97e779d8b97612a7 [file] [log] [blame]
Austin Schuh70cc9552019-01-21 19:46:48 -08001// Ceres Solver - A fast non-linear least squares minimizer
Austin Schuh3de38b02024-06-25 18:25:10 -07002// Copyright 2023 Google Inc. All rights reserved.
Austin Schuh70cc9552019-01-21 19:46:48 -08003// http://ceres-solver.org/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7//
8// * Redistributions of source code must retain the above copyright notice,
9// this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above copyright notice,
11// this list of conditions and the following disclaimer in the documentation
12// and/or other materials provided with the distribution.
13// * Neither the name of Google Inc. nor the names of its contributors may be
14// used to endorse or promote products derived from this software without
15// specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27// POSSIBILITY OF SUCH DAMAGE.
28//
29// Author: vitus@google.com (Michael Vitus)
30
31#ifndef CERES_INTERNAL_CONTEXT_IMPL_H_
32#define CERES_INTERNAL_CONTEXT_IMPL_H_
33
34// This include must come before any #ifndef check on Ceres compile options.
Austin Schuh1d1e6ea2020-12-23 21:56:30 -080035// clang-format off
Austin Schuh3de38b02024-06-25 18:25:10 -070036#include "ceres/internal/config.h"
37// clang-format on
38
39#include <string>
Austin Schuh70cc9552019-01-21 19:46:48 -080040
41#include "ceres/context.h"
Austin Schuh3de38b02024-06-25 18:25:10 -070042#include "ceres/internal/disable_warnings.h"
43#include "ceres/internal/export.h"
Austin Schuh70cc9552019-01-21 19:46:48 -080044
Austin Schuh3de38b02024-06-25 18:25:10 -070045#ifndef CERES_NO_CUDA
46#include "cublas_v2.h"
47#include "cuda_runtime.h"
48#include "cusolverDn.h"
49#include "cusparse.h"
50#endif // CERES_NO_CUDA
51
Austin Schuh70cc9552019-01-21 19:46:48 -080052#include "ceres/thread_pool.h"
Austin Schuh70cc9552019-01-21 19:46:48 -080053
Austin Schuh3de38b02024-06-25 18:25:10 -070054namespace ceres::internal {
Austin Schuh70cc9552019-01-21 19:46:48 -080055
Austin Schuh3de38b02024-06-25 18:25:10 -070056class CERES_NO_EXPORT ContextImpl final : public Context {
Austin Schuh70cc9552019-01-21 19:46:48 -080057 public:
Austin Schuh3de38b02024-06-25 18:25:10 -070058 ContextImpl();
59 ~ContextImpl() override;
Austin Schuh70cc9552019-01-21 19:46:48 -080060 ContextImpl(const ContextImpl&) = delete;
61 void operator=(const ContextImpl&) = delete;
62
Austin Schuh1d1e6ea2020-12-23 21:56:30 -080063 // When compiled with C++ threading support, resize the thread pool to have
Austin Schuh70cc9552019-01-21 19:46:48 -080064 // at min(num_thread, num_hardware_threads) where num_hardware_threads is
65 // defined by the hardware. Otherwise this call is a no-op.
66 void EnsureMinimumThreads(int num_threads);
67
Austin Schuh70cc9552019-01-21 19:46:48 -080068 ThreadPool thread_pool;
Austin Schuh3de38b02024-06-25 18:25:10 -070069
70#ifndef CERES_NO_CUDA
71 // Note on Ceres' use of CUDA Devices on multi-GPU systems:
72 // 1. On a multi-GPU system, if nothing special is done, the "default" CUDA
73 // device will be used, which is device 0.
74 // 2. If the user masks out GPUs using the CUDA_VISIBLE_DEVICES environment
75 // variable, Ceres will still use device 0 visible to the program, but
76 // device 0 will be the first GPU indicated in the environment variable.
77 // 3. If the user explicitly selects a GPU in the host process before calling
78 // Ceres, Ceres will use that GPU.
79
80 // Note on Ceres' use of CUDA Streams:
81 // Most of operations on the GPU are performed using a single stream. In
82 // those cases DefaultStream() should be used. This ensures that operations
83 // are stream-ordered, and might be concurrent with cpu processing with no
84 // additional efforts.
85 //
86 // a. Single-stream workloads
87 // - Only use default stream
88 // - Return control to the callee without synchronization whenever possible
89 // - Stream synchronization occurs only after GPU to CPU transfers, and is
90 // handled by CudaBuffer
91 //
92 // b. Multi-stream workloads
93 // Multi-stream workloads are more restricted in order to make it harder to
94 // get a race-condition.
95 // - Should always synchronize the default stream on entry
96 // - Should always synchronize all utilized streams on exit
97 // - Should not make any assumptions on one of streams_[] being default
98 //
99 // With those rules in place
100 // - All single-stream asynchronous workloads are serialized using default
101 // stream
102 // - Multiple-stream workloads always wait single-stream workloads to finish
103 // and leave no running computations on exit.
104 // This slightly penalizes multi-stream workloads, but makes it easier to
105 // avoid race conditions when multiple-stream workload depends on results of
106 // any preceeding gpu computations.
107
108 // Initializes cuBLAS, cuSOLVER, and cuSPARSE contexts, creates an
109 // asynchronous CUDA stream, and associates the stream with the contexts.
110 // Returns true iff initialization was successful, else it returns false and a
111 // human-readable error message is returned.
112 bool InitCuda(std::string* message);
113 void TearDown();
114 inline bool IsCudaInitialized() const { return is_cuda_initialized_; }
115 // Returns a human-readable string describing the capabilities of the current
116 // CUDA device. CudaConfigAsString can only be called after InitCuda has been
117 // called.
118 std::string CudaConfigAsString() const;
119 // Returns the number of bytes of available global memory on the current CUDA
120 // device. If it is called before InitCuda, it returns 0.
121 size_t GpuMemoryAvailable() const;
122
123 cusolverDnHandle_t cusolver_handle_ = nullptr;
124 cublasHandle_t cublas_handle_ = nullptr;
125
126 // Default stream.
127 // Kernel invocations and memory copies on this stream can be left without
128 // synchronization.
129 cudaStream_t DefaultStream() { return streams_[0]; }
130 static constexpr int kNumCudaStreams = 2;
131 cudaStream_t streams_[kNumCudaStreams] = {0};
132
133 cusparseHandle_t cusparse_handle_ = nullptr;
134 bool is_cuda_initialized_ = false;
135 int gpu_device_id_in_use_ = -1;
136 cudaDeviceProp gpu_device_properties_;
137 bool is_cuda_memory_pools_supported_ = false;
138 int cuda_version_major_ = 0;
139 int cuda_version_minor_ = 0;
140#endif // CERES_NO_CUDA
Austin Schuh70cc9552019-01-21 19:46:48 -0800141};
142
Austin Schuh3de38b02024-06-25 18:25:10 -0700143} // namespace ceres::internal
144
145#include "ceres/internal/reenable_warnings.h"
Austin Schuh70cc9552019-01-21 19:46:48 -0800146
147#endif // CERES_INTERNAL_CONTEXT_IMPL_H_