blob: 2b9d9cce2488d8cf3658c2e9f1ba16be39791395 [file] [log] [blame]
Austin Schuh70cc9552019-01-21 19:46:48 -08001// Ceres Solver - A fast non-linear least squares minimizer
Austin Schuh3de38b02024-06-25 18:25:10 -07002// Copyright 2023 Google Inc. All rights reserved.
Austin Schuh70cc9552019-01-21 19:46:48 -08003// http://ceres-solver.org/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are met:
7//
8// * Redistributions of source code must retain the above copyright notice,
9// this list of conditions and the following disclaimer.
10// * Redistributions in binary form must reproduce the above copyright notice,
11// this list of conditions and the following disclaimer in the documentation
12// and/or other materials provided with the distribution.
13// * Neither the name of Google Inc. nor the names of its contributors may be
14// used to endorse or promote products derived from this software without
15// specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27// POSSIBILITY OF SUCH DAMAGE.
28//
29// Author: vitus@google.com (Michael Vitus)
30
31#include "ceres/context_impl.h"
32
Austin Schuh3de38b02024-06-25 18:25:10 -070033#include <string>
34
35#include "ceres/internal/config.h"
36#include "ceres/stringprintf.h"
37#include "ceres/wall_time.h"
38
39#ifndef CERES_NO_CUDA
40#include "cublas_v2.h"
41#include "cuda_runtime.h"
42#include "cusolverDn.h"
43#endif // CERES_NO_CUDA
44
45namespace ceres::internal {
46
47ContextImpl::ContextImpl() = default;
48
49#ifndef CERES_NO_CUDA
50void ContextImpl::TearDown() {
51 if (cusolver_handle_ != nullptr) {
52 cusolverDnDestroy(cusolver_handle_);
53 cusolver_handle_ = nullptr;
54 }
55 if (cublas_handle_ != nullptr) {
56 cublasDestroy(cublas_handle_);
57 cublas_handle_ = nullptr;
58 }
59 if (cusparse_handle_ != nullptr) {
60 cusparseDestroy(cusparse_handle_);
61 cusparse_handle_ = nullptr;
62 }
63 for (auto& s : streams_) {
64 if (s != nullptr) {
65 cudaStreamDestroy(s);
66 s = nullptr;
67 }
68 }
69 is_cuda_initialized_ = false;
70}
71
72std::string ContextImpl::CudaConfigAsString() const {
73 return ceres::internal::StringPrintf(
74 "======================= CUDA Device Properties ======================\n"
75 "Cuda version : %d.%d\n"
76 "Device ID : %d\n"
77 "Device name : %s\n"
78 "Total GPU memory : %6.f MiB\n"
79 "GPU memory available : %6.f MiB\n"
80 "Compute capability : %d.%d\n"
81 "Warp size : %d\n"
82 "Max threads per block : %d\n"
83 "Max threads per dim : %d %d %d\n"
84 "Max grid size : %d %d %d\n"
85 "Multiprocessor count : %d\n"
86 "cudaMallocAsync supported : %s\n"
87 "====================================================================",
88 cuda_version_major_,
89 cuda_version_minor_,
90 gpu_device_id_in_use_,
91 gpu_device_properties_.name,
92 gpu_device_properties_.totalGlobalMem / 1024.0 / 1024.0,
93 GpuMemoryAvailable() / 1024.0 / 1024.0,
94 gpu_device_properties_.major,
95 gpu_device_properties_.minor,
96 gpu_device_properties_.warpSize,
97 gpu_device_properties_.maxThreadsPerBlock,
98 gpu_device_properties_.maxThreadsDim[0],
99 gpu_device_properties_.maxThreadsDim[1],
100 gpu_device_properties_.maxThreadsDim[2],
101 gpu_device_properties_.maxGridSize[0],
102 gpu_device_properties_.maxGridSize[1],
103 gpu_device_properties_.maxGridSize[2],
104 gpu_device_properties_.multiProcessorCount,
105 // In CUDA 12.0.0+ cudaDeviceProp has field memoryPoolsSupported, but it
106 // is not available in older versions
107 is_cuda_memory_pools_supported_ ? "Yes" : "No");
108}
109
110size_t ContextImpl::GpuMemoryAvailable() const {
111 size_t free, total;
112 cudaMemGetInfo(&free, &total);
113 return free;
114}
115
116bool ContextImpl::InitCuda(std::string* message) {
117 if (is_cuda_initialized_) {
118 return true;
119 }
120 CHECK_EQ(cudaGetDevice(&gpu_device_id_in_use_), cudaSuccess);
121 int cuda_version;
122 CHECK_EQ(cudaRuntimeGetVersion(&cuda_version), cudaSuccess);
123 cuda_version_major_ = cuda_version / 1000;
124 cuda_version_minor_ = (cuda_version % 1000) / 10;
125 CHECK_EQ(
126 cudaGetDeviceProperties(&gpu_device_properties_, gpu_device_id_in_use_),
127 cudaSuccess);
128#if CUDART_VERSION >= 11020
129 int is_cuda_memory_pools_supported;
130 CHECK_EQ(cudaDeviceGetAttribute(&is_cuda_memory_pools_supported,
131 cudaDevAttrMemoryPoolsSupported,
132 gpu_device_id_in_use_),
133 cudaSuccess);
134 is_cuda_memory_pools_supported_ = is_cuda_memory_pools_supported == 1;
135#endif
136 VLOG(3) << "\n" << CudaConfigAsString();
137 EventLogger event_logger("InitCuda");
138 if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) {
139 *message =
140 "CUDA initialization failed because cuBLAS::cublasCreate failed.";
141 cublas_handle_ = nullptr;
142 return false;
143 }
144 event_logger.AddEvent("cublasCreate");
145 if (cusolverDnCreate(&cusolver_handle_) != CUSOLVER_STATUS_SUCCESS) {
146 *message =
147 "CUDA initialization failed because cuSolverDN::cusolverDnCreate "
148 "failed.";
149 TearDown();
150 return false;
151 }
152 event_logger.AddEvent("cusolverDnCreate");
153 if (cusparseCreate(&cusparse_handle_) != CUSPARSE_STATUS_SUCCESS) {
154 *message =
155 "CUDA initialization failed because cuSPARSE::cusparseCreate failed.";
156 TearDown();
157 return false;
158 }
159 event_logger.AddEvent("cusparseCreate");
160 for (auto& s : streams_) {
161 if (cudaStreamCreateWithFlags(&s, cudaStreamNonBlocking) != cudaSuccess) {
162 *message =
163 "CUDA initialization failed because CUDA::cudaStreamCreateWithFlags "
164 "failed.";
165 TearDown();
166 return false;
167 }
168 }
169 event_logger.AddEvent("cudaStreamCreateWithFlags");
170 if (cusolverDnSetStream(cusolver_handle_, DefaultStream()) !=
171 CUSOLVER_STATUS_SUCCESS ||
172 cublasSetStream(cublas_handle_, DefaultStream()) !=
173 CUBLAS_STATUS_SUCCESS ||
174 cusparseSetStream(cusparse_handle_, DefaultStream()) !=
175 CUSPARSE_STATUS_SUCCESS) {
176 *message = "CUDA initialization failed because SetStream failed.";
177 TearDown();
178 return false;
179 }
180 event_logger.AddEvent("SetStream");
181 is_cuda_initialized_ = true;
182 return true;
183}
184#endif // CERES_NO_CUDA
185
186ContextImpl::~ContextImpl() {
187#ifndef CERES_NO_CUDA
188 TearDown();
189#endif // CERES_NO_CUDA
190}
Austin Schuh70cc9552019-01-21 19:46:48 -0800191
192void ContextImpl::EnsureMinimumThreads(int num_threads) {
Austin Schuh70cc9552019-01-21 19:46:48 -0800193 thread_pool.Resize(num_threads);
Austin Schuh70cc9552019-01-21 19:46:48 -0800194}
Austin Schuh3de38b02024-06-25 18:25:10 -0700195
196} // namespace ceres::internal