Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame^] | 1 | // -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 2 | // Copyright (c) 2007, Google Inc. |
| 3 | // All rights reserved. |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame^] | 4 | // |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 5 | // Redistribution and use in source and binary forms, with or without |
| 6 | // modification, are permitted provided that the following conditions are |
| 7 | // met: |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame^] | 8 | // |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 9 | // * Redistributions of source code must retain the above copyright |
| 10 | // notice, this list of conditions and the following disclaimer. |
| 11 | // * Redistributions in binary form must reproduce the above |
| 12 | // copyright notice, this list of conditions and the following disclaimer |
| 13 | // in the documentation and/or other materials provided with the |
| 14 | // distribution. |
| 15 | // * Neither the name of Google Inc. nor the names of its |
| 16 | // contributors may be used to endorse or promote products derived from |
| 17 | // this software without specific prior written permission. |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame^] | 18 | // |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 30 | // |
| 31 | // --- |
| 32 | // Author: Craig Silverstein |
| 33 | // |
| 34 | // The main purpose of this file is to patch the libc allocation |
| 35 | // routines (malloc and friends, but also _msize and other |
| 36 | // windows-specific libc-style routines). However, we also patch |
| 37 | // windows routines to do accounting. We do better at the former than |
| 38 | // the latter. Here are some comments from Paul Pluzhnikov about what |
| 39 | // it might take to do a really good job patching windows routines to |
| 40 | // keep track of memory usage: |
| 41 | // |
| 42 | // "You should intercept at least the following: |
| 43 | // HeapCreate HeapDestroy HeapAlloc HeapReAlloc HeapFree |
| 44 | // RtlCreateHeap RtlDestroyHeap RtlAllocateHeap RtlFreeHeap |
| 45 | // malloc calloc realloc free |
| 46 | // malloc_dbg calloc_dbg realloc_dbg free_dbg |
| 47 | // Some of these call the other ones (but not always), sometimes |
| 48 | // recursively (i.e. HeapCreate may call HeapAlloc on a different |
| 49 | // heap, IIRC)." |
| 50 | // |
| 51 | // Since Paul didn't mention VirtualAllocEx, he may not have even been |
| 52 | // considering all the mmap-like functions that windows has (or he may |
| 53 | // just be ignoring it because he's seen we already patch it). Of the |
| 54 | // above, we do not patch the *_dbg functions, and of the windows |
| 55 | // functions, we only patch HeapAlloc and HeapFree. |
| 56 | // |
| 57 | // The *_dbg functions come into play with /MDd, /MTd, and /MLd, |
| 58 | // probably. It may be ok to just turn off tcmalloc in those cases -- |
| 59 | // if the user wants the windows debug malloc, they probably don't |
| 60 | // want tcmalloc! We should also test with all of /MD, /MT, and /ML, |
| 61 | // which we're not currently doing. |
| 62 | |
| 63 | // TODO(csilvers): try to do better here? Paul does conclude: |
| 64 | // "Keeping track of all of this was a nightmare." |
| 65 | |
| 66 | #ifndef _WIN32 |
| 67 | # error You should only be including windows/patch_functions.cc in a windows environment! |
| 68 | #endif |
| 69 | |
| 70 | #include <config.h> |
| 71 | |
| 72 | #ifdef WIN32_OVERRIDE_ALLOCATORS |
| 73 | #error This file is intended for patching allocators - use override_functions.cc instead. |
| 74 | #endif |
| 75 | |
| 76 | // We use psapi. Non-MSVC systems will have to link this in themselves. |
| 77 | #ifdef _MSC_VER |
| 78 | #pragma comment(lib, "Psapi.lib") |
| 79 | #endif |
| 80 | |
| 81 | // Make sure we always use the 'old' names of the psapi functions. |
| 82 | #ifndef PSAPI_VERSION |
| 83 | #define PSAPI_VERSION 1 |
| 84 | #endif |
| 85 | |
| 86 | #include <windows.h> |
| 87 | #include <stdio.h> |
| 88 | #include <malloc.h> // for _msize and _expand |
| 89 | #include <psapi.h> // for EnumProcessModules, GetModuleInformation, etc. |
| 90 | #include <set> |
| 91 | #include <map> |
| 92 | #include <vector> |
| 93 | #include <base/logging.h> |
| 94 | #include "base/spinlock.h" |
| 95 | #include "gperftools/malloc_hook.h" |
| 96 | #include "malloc_hook-inl.h" |
| 97 | #include "preamble_patcher.h" |
| 98 | |
| 99 | // The maximum number of modules we allow to be in one executable |
| 100 | const int kMaxModules = 8182; |
| 101 | |
| 102 | // These are hard-coded, unfortunately. :-( They are also probably |
| 103 | // compiler specific. See get_mangled_names.cc, in this directory, |
| 104 | // for instructions on how to update these names for your compiler. |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame^] | 105 | #ifdef _WIN64 |
| 106 | const char kMangledNew[] = "??2@YAPEAX_K@Z"; |
| 107 | const char kMangledNewArray[] = "??_U@YAPEAX_K@Z"; |
| 108 | const char kMangledDelete[] = "??3@YAXPEAX@Z"; |
| 109 | const char kMangledDeleteArray[] = "??_V@YAXPEAX@Z"; |
| 110 | const char kMangledNewNothrow[] = "??2@YAPEAX_KAEBUnothrow_t@std@@@Z"; |
| 111 | const char kMangledNewArrayNothrow[] = "??_U@YAPEAX_KAEBUnothrow_t@std@@@Z"; |
| 112 | const char kMangledDeleteNothrow[] = "??3@YAXPEAXAEBUnothrow_t@std@@@Z"; |
| 113 | const char kMangledDeleteArrayNothrow[] = "??_V@YAXPEAXAEBUnothrow_t@std@@@Z"; |
| 114 | #else |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 115 | const char kMangledNew[] = "??2@YAPAXI@Z"; |
| 116 | const char kMangledNewArray[] = "??_U@YAPAXI@Z"; |
| 117 | const char kMangledDelete[] = "??3@YAXPAX@Z"; |
| 118 | const char kMangledDeleteArray[] = "??_V@YAXPAX@Z"; |
| 119 | const char kMangledNewNothrow[] = "??2@YAPAXIABUnothrow_t@std@@@Z"; |
| 120 | const char kMangledNewArrayNothrow[] = "??_U@YAPAXIABUnothrow_t@std@@@Z"; |
| 121 | const char kMangledDeleteNothrow[] = "??3@YAXPAXABUnothrow_t@std@@@Z"; |
| 122 | const char kMangledDeleteArrayNothrow[] = "??_V@YAXPAXABUnothrow_t@std@@@Z"; |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame^] | 123 | #endif |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 124 | |
| 125 | // This is an unused but exported symbol that we can use to tell the |
| 126 | // MSVC linker to bring in libtcmalloc, via the /INCLUDE linker flag. |
| 127 | // Without this, the linker will likely decide that libtcmalloc.dll |
| 128 | // doesn't add anything to the executable (since it does all its work |
| 129 | // through patching, which the linker can't see), and ignore it |
| 130 | // entirely. (The name 'tcmalloc' is already reserved for a |
| 131 | // namespace. I'd rather export a variable named "_tcmalloc", but I |
| 132 | // couldn't figure out how to get that to work. This function exports |
| 133 | // the symbol "__tcmalloc".) |
| 134 | extern "C" PERFTOOLS_DLL_DECL void _tcmalloc(); |
| 135 | void _tcmalloc() { } |
| 136 | |
| 137 | // This is the version needed for windows x64, which has a different |
| 138 | // decoration scheme which doesn't auto-add a leading underscore. |
| 139 | extern "C" PERFTOOLS_DLL_DECL void __tcmalloc(); |
| 140 | void __tcmalloc() { } |
| 141 | |
| 142 | namespace { // most everything here is in an unnamed namespace |
| 143 | |
| 144 | typedef void (*GenericFnPtr)(); |
| 145 | |
| 146 | using sidestep::PreamblePatcher; |
| 147 | |
| 148 | struct ModuleEntryCopy; // defined below |
| 149 | |
| 150 | // These functions are how we override the memory allocation |
| 151 | // functions, just like tcmalloc.cc and malloc_hook.cc do. |
| 152 | |
| 153 | // This is information about the routines we're patching, for a given |
| 154 | // module that implements libc memory routines. A single executable |
| 155 | // can have several libc implementations running about (in different |
| 156 | // .dll's), and we need to patch/unpatch them all. This defines |
| 157 | // everything except the new functions we're patching in, which |
| 158 | // are defined in LibcFunctions, below. |
| 159 | class LibcInfo { |
| 160 | public: |
| 161 | LibcInfo() { |
| 162 | memset(this, 0, sizeof(*this)); // easiest way to initialize the array |
| 163 | } |
| 164 | |
| 165 | bool patched() const { return is_valid(); } |
| 166 | void set_is_valid(bool b) { is_valid_ = b; } |
| 167 | // According to http://msdn.microsoft.com/en-us/library/ms684229(VS.85).aspx: |
| 168 | // "The load address of a module (lpBaseOfDll) is the same as the HMODULE |
| 169 | // value." |
| 170 | HMODULE hmodule() const { |
| 171 | return reinterpret_cast<HMODULE>(const_cast<void*>(module_base_address_)); |
| 172 | } |
| 173 | |
| 174 | // Populates all the windows_fn_[] vars based on our module info. |
| 175 | // Returns false if windows_fn_ is all NULL's, because there's |
| 176 | // nothing to patch. Also populates the rest of the module_entry |
| 177 | // info, such as the module's name. |
| 178 | bool PopulateWindowsFn(const ModuleEntryCopy& module_entry); |
| 179 | |
| 180 | protected: |
| 181 | void CopyFrom(const LibcInfo& that) { |
| 182 | if (this == &that) |
| 183 | return; |
| 184 | this->is_valid_ = that.is_valid_; |
| 185 | memcpy(this->windows_fn_, that.windows_fn_, sizeof(windows_fn_)); |
| 186 | this->module_base_address_ = that.module_base_address_; |
| 187 | this->module_base_size_ = that.module_base_size_; |
| 188 | } |
| 189 | |
| 190 | enum { |
| 191 | kMalloc, kFree, kRealloc, kCalloc, |
| 192 | kNew, kNewArray, kDelete, kDeleteArray, |
| 193 | kNewNothrow, kNewArrayNothrow, kDeleteNothrow, kDeleteArrayNothrow, |
| 194 | // These are windows-only functions from malloc.h |
| 195 | k_Msize, k_Expand, |
| 196 | // A MS CRT "internal" function, implemented using _calloc_impl |
| 197 | k_CallocCrt, |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame^] | 198 | // Underlying deallocation functions called by CRT internal functions or operator delete |
| 199 | kFreeBase, kFreeDbg, |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 200 | kNumFunctions |
| 201 | }; |
| 202 | |
| 203 | // I'd like to put these together in a struct (perhaps in the |
| 204 | // subclass, so we can put in perftools_fn_ as well), but vc8 seems |
| 205 | // to have a bug where it doesn't initialize the struct properly if |
| 206 | // we try to take the address of a function that's not yet loaded |
| 207 | // from a dll, as is the common case for static_fn_. So we need |
| 208 | // each to be in its own array. :-( |
| 209 | static const char* const function_name_[kNumFunctions]; |
| 210 | |
| 211 | // This function is only used when statically linking the binary. |
| 212 | // In that case, loading malloc/etc from the dll (via |
| 213 | // PatchOneModule) won't work, since there are no dlls. Instead, |
| 214 | // you just want to be taking the address of malloc/etc directly. |
| 215 | // In the common, non-static-link case, these pointers will all be |
| 216 | // NULL, since this initializer runs before msvcrt.dll is loaded. |
| 217 | static const GenericFnPtr static_fn_[kNumFunctions]; |
| 218 | |
| 219 | // This is the address of the function we are going to patch |
| 220 | // (malloc, etc). Other info about the function is in the |
| 221 | // patch-specific subclasses, below. |
| 222 | GenericFnPtr windows_fn_[kNumFunctions]; |
| 223 | |
| 224 | // This is set to true when this structure is initialized (because |
| 225 | // we're patching a new library) and set to false when it's |
| 226 | // uninitialized (because we've freed that library). |
| 227 | bool is_valid_; |
| 228 | |
| 229 | const void *module_base_address_; |
| 230 | size_t module_base_size_; |
| 231 | |
| 232 | public: |
| 233 | // These shouldn't have to be public, since only subclasses of |
| 234 | // LibcInfo need it, but they do. Maybe something to do with |
| 235 | // templates. Shrug. I hide them down here so users won't see |
| 236 | // them. :-) (OK, I also need to define ctrgProcAddress late.) |
| 237 | bool is_valid() const { return is_valid_; } |
| 238 | GenericFnPtr windows_fn(int ifunction) const { |
| 239 | return windows_fn_[ifunction]; |
| 240 | } |
| 241 | // These three are needed by ModuleEntryCopy. |
| 242 | static const int ctrgProcAddress = kNumFunctions; |
| 243 | static GenericFnPtr static_fn(int ifunction) { |
| 244 | return static_fn_[ifunction]; |
| 245 | } |
| 246 | static const char* const function_name(int ifunction) { |
| 247 | return function_name_[ifunction]; |
| 248 | } |
| 249 | }; |
| 250 | |
| 251 | // Template trickiness: logically, a LibcInfo would include |
| 252 | // Windows_malloc_, origstub_malloc_, and Perftools_malloc_: for a |
| 253 | // given module, these three go together. And in fact, |
| 254 | // Perftools_malloc_ may need to call origstub_malloc_, which means we |
| 255 | // either need to change Perftools_malloc_ to take origstub_malloc_ as |
| 256 | // an argument -- unfortunately impossible since it needs to keep the |
| 257 | // same API as normal malloc -- or we need to write a different |
| 258 | // version of Perftools_malloc_ for each LibcInfo instance we create. |
| 259 | // We choose the second route, and use templates to implement it (we |
| 260 | // could have also used macros). So to get multiple versions |
| 261 | // of the struct, we say "struct<1> var1; struct<2> var2;". The price |
| 262 | // we pay is some code duplication, and more annoying, each instance |
| 263 | // of this var is a separate type. |
| 264 | template<int> class LibcInfoWithPatchFunctions : public LibcInfo { |
| 265 | public: |
| 266 | // me_info should have had PopulateWindowsFn() called on it, so the |
| 267 | // module_* vars and windows_fn_ are set up. |
| 268 | bool Patch(const LibcInfo& me_info); |
| 269 | void Unpatch(); |
| 270 | |
| 271 | private: |
| 272 | // This holds the original function contents after we patch the function. |
| 273 | // This has to be defined static in the subclass, because the perftools_fns |
| 274 | // reference origstub_fn_. |
| 275 | static GenericFnPtr origstub_fn_[kNumFunctions]; |
| 276 | |
| 277 | // This is the function we want to patch in |
| 278 | static const GenericFnPtr perftools_fn_[kNumFunctions]; |
| 279 | |
| 280 | static void* Perftools_malloc(size_t size) __THROW; |
| 281 | static void Perftools_free(void* ptr) __THROW; |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame^] | 282 | static void Perftools_free_base(void* ptr) __THROW; |
| 283 | static void Perftools_free_dbg(void* ptr, int block_use) __THROW; |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 284 | static void* Perftools_realloc(void* ptr, size_t size) __THROW; |
| 285 | static void* Perftools_calloc(size_t nmemb, size_t size) __THROW; |
| 286 | static void* Perftools_new(size_t size); |
| 287 | static void* Perftools_newarray(size_t size); |
| 288 | static void Perftools_delete(void *ptr); |
| 289 | static void Perftools_deletearray(void *ptr); |
| 290 | static void* Perftools_new_nothrow(size_t size, |
| 291 | const std::nothrow_t&) __THROW; |
| 292 | static void* Perftools_newarray_nothrow(size_t size, |
| 293 | const std::nothrow_t&) __THROW; |
| 294 | static void Perftools_delete_nothrow(void *ptr, |
| 295 | const std::nothrow_t&) __THROW; |
| 296 | static void Perftools_deletearray_nothrow(void *ptr, |
| 297 | const std::nothrow_t&) __THROW; |
| 298 | static size_t Perftools__msize(void *ptr) __THROW; |
| 299 | static void* Perftools__expand(void *ptr, size_t size) __THROW; |
| 300 | // malloc.h also defines these functions: |
| 301 | // _aligned_malloc, _aligned_free, |
| 302 | // _recalloc, _aligned_offset_malloc, _aligned_realloc, _aligned_recalloc |
| 303 | // _aligned_offset_realloc, _aligned_offset_recalloc, _malloca, _freea |
| 304 | // But they seem pretty obscure, and I'm fine not overriding them for now. |
| 305 | // It may be they all call into malloc/free anyway. |
| 306 | }; |
| 307 | |
| 308 | // This is a subset of MODDULEENTRY32, that we need for patching. |
| 309 | struct ModuleEntryCopy { |
| 310 | LPVOID modBaseAddr; // the same as hmodule |
| 311 | DWORD modBaseSize; |
| 312 | // This is not part of MODDULEENTRY32, but is needed to avoid making |
| 313 | // windows syscalls while we're holding patch_all_modules_lock (see |
| 314 | // lock-inversion comments at patch_all_modules_lock definition, below). |
| 315 | GenericFnPtr rgProcAddresses[LibcInfo::ctrgProcAddress]; |
| 316 | |
| 317 | ModuleEntryCopy() { |
| 318 | modBaseAddr = NULL; |
| 319 | modBaseSize = 0; |
| 320 | for (int i = 0; i < sizeof(rgProcAddresses)/sizeof(*rgProcAddresses); i++) |
| 321 | rgProcAddresses[i] = LibcInfo::static_fn(i); |
| 322 | } |
| 323 | ModuleEntryCopy(const MODULEINFO& mi) { |
| 324 | this->modBaseAddr = mi.lpBaseOfDll; |
| 325 | this->modBaseSize = mi.SizeOfImage; |
| 326 | LPVOID modEndAddr = (char*)mi.lpBaseOfDll + mi.SizeOfImage; |
| 327 | for (int i = 0; i < sizeof(rgProcAddresses)/sizeof(*rgProcAddresses); i++) { |
| 328 | FARPROC target = ::GetProcAddress( |
| 329 | reinterpret_cast<const HMODULE>(mi.lpBaseOfDll), |
| 330 | LibcInfo::function_name(i)); |
| 331 | // Sometimes a DLL forwards a function to a function in another |
| 332 | // DLL. We don't want to patch those forwarded functions -- |
| 333 | // they'll get patched when the other DLL is processed. |
| 334 | if (target >= modBaseAddr && target < modEndAddr) |
| 335 | rgProcAddresses[i] = (GenericFnPtr)target; |
| 336 | else |
| 337 | rgProcAddresses[i] = (GenericFnPtr)NULL; |
| 338 | } |
| 339 | } |
| 340 | }; |
| 341 | |
| 342 | // This class is easier because there's only one of them. |
| 343 | class WindowsInfo { |
| 344 | public: |
| 345 | void Patch(); |
| 346 | void Unpatch(); |
| 347 | |
| 348 | private: |
| 349 | // TODO(csilvers): should we be patching GlobalAlloc/LocalAlloc instead, |
| 350 | // for pre-XP systems? |
| 351 | enum { |
| 352 | kHeapAlloc, kHeapFree, kVirtualAllocEx, kVirtualFreeEx, |
| 353 | kMapViewOfFileEx, kUnmapViewOfFile, kLoadLibraryExW, kFreeLibrary, |
| 354 | kNumFunctions |
| 355 | }; |
| 356 | |
| 357 | struct FunctionInfo { |
| 358 | const char* const name; // name of fn in a module (eg "malloc") |
| 359 | GenericFnPtr windows_fn; // the fn whose name we call (&malloc) |
| 360 | GenericFnPtr origstub_fn; // original fn contents after we patch |
| 361 | const GenericFnPtr perftools_fn; // fn we want to patch in |
| 362 | }; |
| 363 | |
| 364 | static FunctionInfo function_info_[kNumFunctions]; |
| 365 | |
| 366 | // A Windows-API equivalent of malloc and free |
| 367 | static LPVOID WINAPI Perftools_HeapAlloc(HANDLE hHeap, DWORD dwFlags, |
| 368 | DWORD_PTR dwBytes); |
| 369 | static BOOL WINAPI Perftools_HeapFree(HANDLE hHeap, DWORD dwFlags, |
| 370 | LPVOID lpMem); |
| 371 | // A Windows-API equivalent of mmap and munmap, for "anonymous regions" |
| 372 | static LPVOID WINAPI Perftools_VirtualAllocEx(HANDLE process, LPVOID address, |
| 373 | SIZE_T size, DWORD type, |
| 374 | DWORD protect); |
| 375 | static BOOL WINAPI Perftools_VirtualFreeEx(HANDLE process, LPVOID address, |
| 376 | SIZE_T size, DWORD type); |
| 377 | // A Windows-API equivalent of mmap and munmap, for actual files |
| 378 | static LPVOID WINAPI Perftools_MapViewOfFileEx(HANDLE hFileMappingObject, |
| 379 | DWORD dwDesiredAccess, |
| 380 | DWORD dwFileOffsetHigh, |
| 381 | DWORD dwFileOffsetLow, |
| 382 | SIZE_T dwNumberOfBytesToMap, |
| 383 | LPVOID lpBaseAddress); |
| 384 | static BOOL WINAPI Perftools_UnmapViewOfFile(LPCVOID lpBaseAddress); |
| 385 | // We don't need the other 3 variants because they all call this one. */ |
| 386 | static HMODULE WINAPI Perftools_LoadLibraryExW(LPCWSTR lpFileName, |
| 387 | HANDLE hFile, |
| 388 | DWORD dwFlags); |
| 389 | static BOOL WINAPI Perftools_FreeLibrary(HMODULE hLibModule); |
| 390 | }; |
| 391 | |
| 392 | // If you run out, just add a few more to the array. You'll also need |
| 393 | // to update the switch statement in PatchOneModule(), and the list in |
| 394 | // UnpatchWindowsFunctions(). |
| 395 | // main_executable and main_executable_windows are two windows into |
| 396 | // the same executable. One is responsible for patching the libc |
| 397 | // routines that live in the main executable (if any) to use tcmalloc; |
| 398 | // the other is responsible for patching the windows routines like |
| 399 | // HeapAlloc/etc to use tcmalloc. |
| 400 | static LibcInfoWithPatchFunctions<0> main_executable; |
| 401 | static LibcInfoWithPatchFunctions<1> libc1; |
| 402 | static LibcInfoWithPatchFunctions<2> libc2; |
| 403 | static LibcInfoWithPatchFunctions<3> libc3; |
| 404 | static LibcInfoWithPatchFunctions<4> libc4; |
| 405 | static LibcInfoWithPatchFunctions<5> libc5; |
| 406 | static LibcInfoWithPatchFunctions<6> libc6; |
| 407 | static LibcInfoWithPatchFunctions<7> libc7; |
| 408 | static LibcInfoWithPatchFunctions<8> libc8; |
| 409 | static LibcInfo* g_module_libcs[] = { |
| 410 | &libc1, &libc2, &libc3, &libc4, &libc5, &libc6, &libc7, &libc8 |
| 411 | }; |
| 412 | static WindowsInfo main_executable_windows; |
| 413 | |
| 414 | const char* const LibcInfo::function_name_[] = { |
| 415 | "malloc", "free", "realloc", "calloc", |
| 416 | kMangledNew, kMangledNewArray, kMangledDelete, kMangledDeleteArray, |
| 417 | // Ideally we should patch the nothrow versions of new/delete, but |
| 418 | // at least in msvcrt, nothrow-new machine-code is of a type we |
| 419 | // can't patch. Since these are relatively rare, I'm hoping it's ok |
| 420 | // not to patch them. (NULL name turns off patching.) |
| 421 | NULL, // kMangledNewNothrow, |
| 422 | NULL, // kMangledNewArrayNothrow, |
| 423 | NULL, // kMangledDeleteNothrow, |
| 424 | NULL, // kMangledDeleteArrayNothrow, |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame^] | 425 | "_msize", "_expand", "_calloc_crt", "_free_base", "_free_dbg" |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 426 | }; |
| 427 | |
| 428 | // For mingw, I can't patch the new/delete here, because the |
| 429 | // instructions are too small to patch. Luckily, they're so small |
| 430 | // because all they do is call into malloc/free, so they still end up |
| 431 | // calling tcmalloc routines, and we don't actually lose anything |
| 432 | // (except maybe some stacktrace goodness) by not patching. |
| 433 | const GenericFnPtr LibcInfo::static_fn_[] = { |
| 434 | (GenericFnPtr)&::malloc, |
| 435 | (GenericFnPtr)&::free, |
| 436 | (GenericFnPtr)&::realloc, |
| 437 | (GenericFnPtr)&::calloc, |
| 438 | #ifdef __MINGW32__ |
| 439 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
| 440 | #else |
| 441 | (GenericFnPtr)(void*(*)(size_t))&::operator new, |
| 442 | (GenericFnPtr)(void*(*)(size_t))&::operator new[], |
| 443 | (GenericFnPtr)(void(*)(void*))&::operator delete, |
| 444 | (GenericFnPtr)(void(*)(void*))&::operator delete[], |
| 445 | (GenericFnPtr) |
| 446 | (void*(*)(size_t, struct std::nothrow_t const &))&::operator new, |
| 447 | (GenericFnPtr) |
| 448 | (void*(*)(size_t, struct std::nothrow_t const &))&::operator new[], |
| 449 | (GenericFnPtr) |
| 450 | (void(*)(void*, struct std::nothrow_t const &))&::operator delete, |
| 451 | (GenericFnPtr) |
| 452 | (void(*)(void*, struct std::nothrow_t const &))&::operator delete[], |
| 453 | #endif |
| 454 | (GenericFnPtr)&::_msize, |
| 455 | (GenericFnPtr)&::_expand, |
| 456 | (GenericFnPtr)&::calloc, |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame^] | 457 | (GenericFnPtr)&::free, |
| 458 | (GenericFnPtr)&::free |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 459 | }; |
| 460 | |
| 461 | template<int T> GenericFnPtr LibcInfoWithPatchFunctions<T>::origstub_fn_[] = { |
| 462 | // This will get filled in at run-time, as patching is done. |
| 463 | }; |
| 464 | |
| 465 | template<int T> |
| 466 | const GenericFnPtr LibcInfoWithPatchFunctions<T>::perftools_fn_[] = { |
| 467 | (GenericFnPtr)&Perftools_malloc, |
| 468 | (GenericFnPtr)&Perftools_free, |
| 469 | (GenericFnPtr)&Perftools_realloc, |
| 470 | (GenericFnPtr)&Perftools_calloc, |
| 471 | (GenericFnPtr)&Perftools_new, |
| 472 | (GenericFnPtr)&Perftools_newarray, |
| 473 | (GenericFnPtr)&Perftools_delete, |
| 474 | (GenericFnPtr)&Perftools_deletearray, |
| 475 | (GenericFnPtr)&Perftools_new_nothrow, |
| 476 | (GenericFnPtr)&Perftools_newarray_nothrow, |
| 477 | (GenericFnPtr)&Perftools_delete_nothrow, |
| 478 | (GenericFnPtr)&Perftools_deletearray_nothrow, |
| 479 | (GenericFnPtr)&Perftools__msize, |
| 480 | (GenericFnPtr)&Perftools__expand, |
| 481 | (GenericFnPtr)&Perftools_calloc, |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame^] | 482 | (GenericFnPtr)&Perftools_free_base, |
| 483 | (GenericFnPtr)&Perftools_free_dbg |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 484 | }; |
| 485 | |
| 486 | /*static*/ WindowsInfo::FunctionInfo WindowsInfo::function_info_[] = { |
| 487 | { "HeapAlloc", NULL, NULL, (GenericFnPtr)&Perftools_HeapAlloc }, |
| 488 | { "HeapFree", NULL, NULL, (GenericFnPtr)&Perftools_HeapFree }, |
| 489 | { "VirtualAllocEx", NULL, NULL, (GenericFnPtr)&Perftools_VirtualAllocEx }, |
| 490 | { "VirtualFreeEx", NULL, NULL, (GenericFnPtr)&Perftools_VirtualFreeEx }, |
| 491 | { "MapViewOfFileEx", NULL, NULL, (GenericFnPtr)&Perftools_MapViewOfFileEx }, |
| 492 | { "UnmapViewOfFile", NULL, NULL, (GenericFnPtr)&Perftools_UnmapViewOfFile }, |
| 493 | { "LoadLibraryExW", NULL, NULL, (GenericFnPtr)&Perftools_LoadLibraryExW }, |
| 494 | { "FreeLibrary", NULL, NULL, (GenericFnPtr)&Perftools_FreeLibrary }, |
| 495 | }; |
| 496 | |
| 497 | bool LibcInfo::PopulateWindowsFn(const ModuleEntryCopy& module_entry) { |
| 498 | // First, store the location of the function to patch before |
| 499 | // patching it. If none of these functions are found in the module, |
| 500 | // then this module has no libc in it, and we just return false. |
| 501 | for (int i = 0; i < kNumFunctions; i++) { |
| 502 | if (!function_name_[i]) // we can turn off patching by unsetting name |
| 503 | continue; |
| 504 | // The ::GetProcAddress calls were done in the ModuleEntryCopy |
| 505 | // constructor, so we don't have to make any windows calls here. |
| 506 | const GenericFnPtr fn = module_entry.rgProcAddresses[i]; |
| 507 | if (fn) { |
| 508 | windows_fn_[i] = PreamblePatcher::ResolveTarget(fn); |
| 509 | } |
| 510 | } |
| 511 | |
| 512 | // Some modules use the same function pointer for new and new[]. If |
| 513 | // we find that, set one of the pointers to NULL so we don't double- |
| 514 | // patch. Same may happen with new and nothrow-new, or even new[] |
| 515 | // and nothrow-new. It's easiest just to check each fn-ptr against |
| 516 | // every other. |
| 517 | for (int i = 0; i < kNumFunctions; i++) { |
| 518 | for (int j = i+1; j < kNumFunctions; j++) { |
| 519 | if (windows_fn_[i] == windows_fn_[j]) { |
| 520 | // We NULL the later one (j), so as to minimize the chances we |
| 521 | // NULL kFree and kRealloc. See comments below. This is fragile! |
| 522 | windows_fn_[j] = NULL; |
| 523 | } |
| 524 | } |
| 525 | } |
| 526 | |
| 527 | // There's always a chance that our module uses the same function |
| 528 | // as another module that we've already loaded. In that case, we |
| 529 | // need to set our windows_fn to NULL, to avoid double-patching. |
| 530 | for (int ifn = 0; ifn < kNumFunctions; ifn++) { |
| 531 | for (int imod = 0; |
| 532 | imod < sizeof(g_module_libcs)/sizeof(*g_module_libcs); imod++) { |
| 533 | if (g_module_libcs[imod]->is_valid() && |
| 534 | this->windows_fn(ifn) == g_module_libcs[imod]->windows_fn(ifn)) { |
| 535 | windows_fn_[ifn] = NULL; |
| 536 | } |
| 537 | } |
| 538 | } |
| 539 | |
| 540 | bool found_non_null = false; |
| 541 | for (int i = 0; i < kNumFunctions; i++) { |
| 542 | if (windows_fn_[i]) |
| 543 | found_non_null = true; |
| 544 | } |
| 545 | if (!found_non_null) |
| 546 | return false; |
| 547 | |
| 548 | // It's important we didn't NULL out windows_fn_[kFree] or [kRealloc]. |
| 549 | // The reason is, if those are NULL-ed out, we'll never patch them |
| 550 | // and thus never get an origstub_fn_ value for them, and when we |
| 551 | // try to call origstub_fn_[kFree/kRealloc] in Perftools_free and |
| 552 | // Perftools_realloc, below, it will fail. We could work around |
| 553 | // that by adding a pointer from one patch-unit to the other, but we |
| 554 | // haven't needed to yet. |
| 555 | CHECK(windows_fn_[kFree]); |
| 556 | CHECK(windows_fn_[kRealloc]); |
| 557 | |
| 558 | // OK, we successfully populated. Let's store our member information. |
| 559 | module_base_address_ = module_entry.modBaseAddr; |
| 560 | module_base_size_ = module_entry.modBaseSize; |
| 561 | return true; |
| 562 | } |
| 563 | |
| 564 | template<int T> |
| 565 | bool LibcInfoWithPatchFunctions<T>::Patch(const LibcInfo& me_info) { |
| 566 | CopyFrom(me_info); // copies the module_entry and the windows_fn_ array |
| 567 | for (int i = 0; i < kNumFunctions; i++) { |
| 568 | if (windows_fn_[i] && windows_fn_[i] != perftools_fn_[i]) { |
| 569 | // if origstub_fn_ is not NULL, it's left around from a previous |
| 570 | // patch. We need to set it to NULL for the new Patch call. |
| 571 | // |
| 572 | // Note that origstub_fn_ was logically freed by |
| 573 | // PreamblePatcher::Unpatch, so we don't have to do anything |
| 574 | // about it. |
| 575 | origstub_fn_[i] = NULL; // Patch() will fill this in |
| 576 | CHECK_EQ(sidestep::SIDESTEP_SUCCESS, |
| 577 | PreamblePatcher::Patch(windows_fn_[i], perftools_fn_[i], |
| 578 | &origstub_fn_[i])); |
| 579 | } |
| 580 | } |
| 581 | set_is_valid(true); |
| 582 | return true; |
| 583 | } |
| 584 | |
| 585 | template<int T> |
| 586 | void LibcInfoWithPatchFunctions<T>::Unpatch() { |
| 587 | // We have to cast our GenericFnPtrs to void* for unpatch. This is |
| 588 | // contra the C++ spec; we use C-style casts to empahsize that. |
| 589 | for (int i = 0; i < kNumFunctions; i++) { |
| 590 | if (windows_fn_[i]) |
| 591 | CHECK_EQ(sidestep::SIDESTEP_SUCCESS, |
| 592 | PreamblePatcher::Unpatch((void*)windows_fn_[i], |
| 593 | (void*)perftools_fn_[i], |
| 594 | (void*)origstub_fn_[i])); |
| 595 | } |
| 596 | set_is_valid(false); |
| 597 | } |
| 598 | |
| 599 | void WindowsInfo::Patch() { |
| 600 | HMODULE hkernel32 = ::GetModuleHandleA("kernel32"); |
| 601 | CHECK_NE(hkernel32, NULL); |
| 602 | |
| 603 | // Unlike for libc, we know these exist in our module, so we can get |
| 604 | // and patch at the same time. |
| 605 | for (int i = 0; i < kNumFunctions; i++) { |
| 606 | function_info_[i].windows_fn = (GenericFnPtr) |
| 607 | ::GetProcAddress(hkernel32, function_info_[i].name); |
| 608 | // If origstub_fn is not NULL, it's left around from a previous |
| 609 | // patch. We need to set it to NULL for the new Patch call. |
| 610 | // Since we've patched Unpatch() not to delete origstub_fn_ (it |
| 611 | // causes problems in some contexts, though obviously not this |
| 612 | // one), we should delete it now, before setting it to NULL. |
| 613 | // NOTE: casting from a function to a pointer is contra the C++ |
| 614 | // spec. It's not safe on IA64, but is on i386. We use |
| 615 | // a C-style cast here to emphasize this is not legal C++. |
| 616 | delete[] (char*)(function_info_[i].origstub_fn); |
| 617 | function_info_[i].origstub_fn = NULL; // Patch() will fill this in |
| 618 | CHECK_EQ(sidestep::SIDESTEP_SUCCESS, |
| 619 | PreamblePatcher::Patch(function_info_[i].windows_fn, |
| 620 | function_info_[i].perftools_fn, |
| 621 | &function_info_[i].origstub_fn)); |
| 622 | } |
| 623 | } |
| 624 | |
| 625 | void WindowsInfo::Unpatch() { |
| 626 | // We have to cast our GenericFnPtrs to void* for unpatch. This is |
| 627 | // contra the C++ spec; we use C-style casts to empahsize that. |
| 628 | for (int i = 0; i < kNumFunctions; i++) { |
| 629 | CHECK_EQ(sidestep::SIDESTEP_SUCCESS, |
| 630 | PreamblePatcher::Unpatch((void*)function_info_[i].windows_fn, |
| 631 | (void*)function_info_[i].perftools_fn, |
| 632 | (void*)function_info_[i].origstub_fn)); |
| 633 | } |
| 634 | } |
| 635 | |
| 636 | // You should hold the patch_all_modules_lock when calling this. |
| 637 | void PatchOneModuleLocked(const LibcInfo& me_info) { |
| 638 | // If we don't already have info on this module, let's add it. This |
| 639 | // is where we're sad that each libcX has a different type, so we |
| 640 | // can't use an array; instead, we have to use a switch statement. |
| 641 | // Patch() returns false if there were no libc functions in the module. |
| 642 | for (int i = 0; i < sizeof(g_module_libcs)/sizeof(*g_module_libcs); i++) { |
| 643 | if (!g_module_libcs[i]->is_valid()) { // found an empty spot to add! |
| 644 | switch (i) { |
| 645 | case 0: libc1.Patch(me_info); return; |
| 646 | case 1: libc2.Patch(me_info); return; |
| 647 | case 2: libc3.Patch(me_info); return; |
| 648 | case 3: libc4.Patch(me_info); return; |
| 649 | case 4: libc5.Patch(me_info); return; |
| 650 | case 5: libc6.Patch(me_info); return; |
| 651 | case 6: libc7.Patch(me_info); return; |
| 652 | case 7: libc8.Patch(me_info); return; |
| 653 | } |
| 654 | } |
| 655 | } |
| 656 | printf("PERFTOOLS ERROR: Too many modules containing libc in this executable\n"); |
| 657 | } |
| 658 | |
| 659 | void PatchMainExecutableLocked() { |
| 660 | if (main_executable.patched()) |
| 661 | return; // main executable has already been patched |
| 662 | ModuleEntryCopy fake_module_entry; // make a fake one to pass into Patch() |
| 663 | // No need to call PopulateModuleEntryProcAddresses on the main executable. |
| 664 | main_executable.PopulateWindowsFn(fake_module_entry); |
| 665 | main_executable.Patch(main_executable); |
| 666 | } |
| 667 | |
| 668 | // This lock is subject to a subtle and annoying lock inversion |
| 669 | // problem: it may interact badly with unknown internal windows locks. |
| 670 | // In particular, windows may be holding a lock when it calls |
| 671 | // LoadLibraryExW and FreeLibrary, which we've patched. We have those |
| 672 | // routines call PatchAllModules, which acquires this lock. If we |
| 673 | // make windows system calls while holding this lock, those system |
| 674 | // calls may need the internal windows locks that are being held in |
| 675 | // the call to LoadLibraryExW, resulting in deadlock. The solution is |
| 676 | // to be very careful not to call *any* windows routines while holding |
| 677 | // patch_all_modules_lock, inside PatchAllModules(). |
| 678 | static SpinLock patch_all_modules_lock(SpinLock::LINKER_INITIALIZED); |
| 679 | |
| 680 | // last_loaded: The set of modules that were loaded the last time |
| 681 | // PatchAllModules was called. This is an optimization for only |
| 682 | // looking at modules that were added or removed from the last call. |
| 683 | static std::set<HMODULE> *g_last_loaded; |
| 684 | |
| 685 | // Iterates over all the modules currently loaded by the executable, |
| 686 | // according to windows, and makes sure they're all patched. Most |
| 687 | // modules will already be in loaded_modules, meaning we have already |
| 688 | // loaded and either patched them or determined they did not need to |
| 689 | // be patched. Others will not, which means we need to patch them |
| 690 | // (if necessary). Finally, we have to go through the existing |
| 691 | // g_module_libcs and see if any of those are *not* in the modules |
| 692 | // currently loaded by the executable. If so, we need to invalidate |
| 693 | // them. Returns true if we did any work (patching or invalidating), |
| 694 | // false if we were a noop. May update loaded_modules as well. |
| 695 | // NOTE: you must hold the patch_all_modules_lock to access loaded_modules. |
| 696 | bool PatchAllModules() { |
| 697 | std::vector<ModuleEntryCopy> modules; |
| 698 | bool made_changes = false; |
| 699 | |
| 700 | const HANDLE hCurrentProcess = GetCurrentProcess(); |
| 701 | DWORD num_modules = 0; |
| 702 | HMODULE hModules[kMaxModules]; // max # of modules we support in one process |
| 703 | if (!::EnumProcessModules(hCurrentProcess, hModules, sizeof(hModules), |
| 704 | &num_modules)) { |
| 705 | num_modules = 0; |
| 706 | } |
| 707 | // EnumProcessModules actually set the bytes written into hModules, |
| 708 | // so we need to divide to make num_modules actually be a module-count. |
| 709 | num_modules /= sizeof(*hModules); |
| 710 | if (num_modules >= kMaxModules) { |
| 711 | printf("PERFTOOLS ERROR: Too many modules in this executable to try" |
| 712 | " to patch them all (if you need to, raise kMaxModules in" |
| 713 | " patch_functions.cc).\n"); |
| 714 | num_modules = kMaxModules; |
| 715 | } |
| 716 | |
| 717 | // Now we handle the unpatching of modules we have in g_module_libcs |
| 718 | // but that were not found in EnumProcessModules. We need to |
| 719 | // invalidate them. To speed that up, we store the EnumProcessModules |
| 720 | // output in a set. |
| 721 | // At the same time, we prepare for the adding of new modules, by |
| 722 | // removing from hModules all the modules we know we've already |
| 723 | // patched (or decided don't need to be patched). At the end, |
| 724 | // hModules will hold only the modules that we need to consider patching. |
| 725 | std::set<HMODULE> currently_loaded_modules; |
| 726 | { |
| 727 | SpinLockHolder h(&patch_all_modules_lock); |
| 728 | if (!g_last_loaded) g_last_loaded = new std::set<HMODULE>; |
| 729 | // At the end of this loop, currently_loaded_modules contains the |
| 730 | // full list of EnumProcessModules, and hModules just the ones we |
| 731 | // haven't handled yet. |
| 732 | for (int i = 0; i < num_modules; ) { |
| 733 | currently_loaded_modules.insert(hModules[i]); |
| 734 | if (g_last_loaded->count(hModules[i]) > 0) { |
| 735 | hModules[i] = hModules[--num_modules]; // replace element i with tail |
| 736 | } else { |
| 737 | i++; // keep element i |
| 738 | } |
| 739 | } |
| 740 | // Now we do the unpatching/invalidation. |
| 741 | for (int i = 0; i < sizeof(g_module_libcs)/sizeof(*g_module_libcs); i++) { |
| 742 | if (g_module_libcs[i]->patched() && |
| 743 | currently_loaded_modules.count(g_module_libcs[i]->hmodule()) == 0) { |
| 744 | // Means g_module_libcs[i] is no longer loaded (no me32 matched). |
| 745 | // We could call Unpatch() here, but why bother? The module |
| 746 | // has gone away, so nobody is going to call into it anyway. |
| 747 | g_module_libcs[i]->set_is_valid(false); |
| 748 | made_changes = true; |
| 749 | } |
| 750 | } |
| 751 | // Update the loaded module cache. |
| 752 | g_last_loaded->swap(currently_loaded_modules); |
| 753 | } |
| 754 | |
| 755 | // Now that we know what modules are new, let's get the info we'll |
| 756 | // need to patch them. Note this *cannot* be done while holding the |
| 757 | // lock, since it needs to make windows calls (see the lock-inversion |
| 758 | // comments before the definition of patch_all_modules_lock). |
| 759 | MODULEINFO mi; |
| 760 | for (int i = 0; i < num_modules; i++) { |
| 761 | if (::GetModuleInformation(hCurrentProcess, hModules[i], &mi, sizeof(mi))) |
| 762 | modules.push_back(ModuleEntryCopy(mi)); |
| 763 | } |
| 764 | |
| 765 | // Now we can do the patching of new modules. |
| 766 | { |
| 767 | SpinLockHolder h(&patch_all_modules_lock); |
| 768 | for (std::vector<ModuleEntryCopy>::iterator it = modules.begin(); |
| 769 | it != modules.end(); ++it) { |
| 770 | LibcInfo libc_info; |
| 771 | if (libc_info.PopulateWindowsFn(*it)) { // true==module has libc routines |
| 772 | PatchOneModuleLocked(libc_info); |
| 773 | made_changes = true; |
| 774 | } |
| 775 | } |
| 776 | |
| 777 | // Now that we've dealt with the modules (dlls), update the main |
| 778 | // executable. We do this last because PatchMainExecutableLocked |
| 779 | // wants to look at how other modules were patched. |
| 780 | if (!main_executable.patched()) { |
| 781 | PatchMainExecutableLocked(); |
| 782 | made_changes = true; |
| 783 | } |
| 784 | } |
| 785 | // TODO(csilvers): for this to be reliable, we need to also take |
| 786 | // into account if we *would* have patched any modules had they not |
| 787 | // already been loaded. (That is, made_changes should ignore |
| 788 | // g_last_loaded.) |
| 789 | return made_changes; |
| 790 | } |
| 791 | |
| 792 | |
| 793 | } // end unnamed namespace |
| 794 | |
| 795 | // --------------------------------------------------------------------- |
| 796 | // Now that we've done all the patching machinery, let's actually |
| 797 | // define the functions we're patching in. Mostly these are |
| 798 | // simple wrappers around the do_* routines in tcmalloc.cc. |
| 799 | // |
| 800 | // In fact, we #include tcmalloc.cc to get at the tcmalloc internal |
| 801 | // do_* functions, the better to write our own hook functions. |
| 802 | // U-G-L-Y, I know. But the alternatives are, perhaps, worse. This |
| 803 | // also lets us define _msize(), _expand(), and other windows-specific |
| 804 | // functions here, using tcmalloc internals, without polluting |
| 805 | // tcmalloc.cc. |
| 806 | // ------------------------------------------------------------------- |
| 807 | |
| 808 | // TODO(csilvers): refactor tcmalloc.cc into two files, so I can link |
| 809 | // against the file with do_malloc, and ignore the one with malloc. |
| 810 | #include "tcmalloc.cc" |
| 811 | |
| 812 | template<int T> |
| 813 | void* LibcInfoWithPatchFunctions<T>::Perftools_malloc(size_t size) __THROW { |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame^] | 814 | return malloc_fast_path<tcmalloc::malloc_oom>(size); |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 815 | } |
| 816 | |
| 817 | template<int T> |
| 818 | void LibcInfoWithPatchFunctions<T>::Perftools_free(void* ptr) __THROW { |
| 819 | MallocHook::InvokeDeleteHook(ptr); |
| 820 | // This calls the windows free if do_free decides ptr was not |
| 821 | // allocated by tcmalloc. Note it calls the origstub_free from |
| 822 | // *this* templatized instance of LibcInfo. See "template |
| 823 | // trickiness" above. |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame^] | 824 | do_free_with_callback(ptr, (void (*)(void*))origstub_fn_[kFree], false, 0); |
| 825 | } |
| 826 | |
| 827 | template<int T> |
| 828 | void LibcInfoWithPatchFunctions<T>::Perftools_free_base(void* ptr) __THROW{ |
| 829 | MallocHook::InvokeDeleteHook(ptr); |
| 830 | // This calls the windows free if do_free decides ptr was not |
| 831 | // allocated by tcmalloc. Note it calls the origstub_free from |
| 832 | // *this* templatized instance of LibcInfo. See "template |
| 833 | // trickiness" above. |
| 834 | do_free_with_callback(ptr, (void(*)(void*))origstub_fn_[kFreeBase], false, 0); |
| 835 | } |
| 836 | |
| 837 | template<int T> |
| 838 | void LibcInfoWithPatchFunctions<T>::Perftools_free_dbg(void* ptr, int block_use) __THROW { |
| 839 | MallocHook::InvokeDeleteHook(ptr); |
| 840 | // The windows _free_dbg is called if ptr isn't owned by tcmalloc. |
| 841 | if (MallocExtension::instance()->GetOwnership(ptr) == MallocExtension::kOwned) { |
| 842 | do_free(ptr); |
| 843 | } else { |
| 844 | reinterpret_cast<void (*)(void*, int)>(origstub_fn_[kFreeDbg])(ptr, block_use); |
| 845 | } |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 846 | } |
| 847 | |
| 848 | template<int T> |
| 849 | void* LibcInfoWithPatchFunctions<T>::Perftools_realloc( |
| 850 | void* old_ptr, size_t new_size) __THROW { |
| 851 | if (old_ptr == NULL) { |
| 852 | void* result = do_malloc_or_cpp_alloc(new_size); |
| 853 | MallocHook::InvokeNewHook(result, new_size); |
| 854 | return result; |
| 855 | } |
| 856 | if (new_size == 0) { |
| 857 | MallocHook::InvokeDeleteHook(old_ptr); |
| 858 | do_free_with_callback(old_ptr, |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame^] | 859 | (void (*)(void*))origstub_fn_[kFree], false, 0); |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 860 | return NULL; |
| 861 | } |
| 862 | return do_realloc_with_callback( |
| 863 | old_ptr, new_size, |
| 864 | (void (*)(void*))origstub_fn_[kFree], |
| 865 | (size_t (*)(const void*))origstub_fn_[k_Msize]); |
| 866 | } |
| 867 | |
| 868 | template<int T> |
| 869 | void* LibcInfoWithPatchFunctions<T>::Perftools_calloc( |
| 870 | size_t n, size_t elem_size) __THROW { |
| 871 | void* result = do_calloc(n, elem_size); |
| 872 | MallocHook::InvokeNewHook(result, n * elem_size); |
| 873 | return result; |
| 874 | } |
| 875 | |
| 876 | template<int T> |
| 877 | void* LibcInfoWithPatchFunctions<T>::Perftools_new(size_t size) { |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame^] | 878 | return malloc_fast_path<tcmalloc::cpp_throw_oom>(size); |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 879 | } |
| 880 | |
| 881 | template<int T> |
| 882 | void* LibcInfoWithPatchFunctions<T>::Perftools_newarray(size_t size) { |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame^] | 883 | return malloc_fast_path<tcmalloc::cpp_throw_oom>(size); |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 884 | } |
| 885 | |
| 886 | template<int T> |
| 887 | void LibcInfoWithPatchFunctions<T>::Perftools_delete(void *p) { |
| 888 | MallocHook::InvokeDeleteHook(p); |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame^] | 889 | do_free_with_callback(p, (void (*)(void*))origstub_fn_[kFree], false, 0); |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 890 | } |
| 891 | |
| 892 | template<int T> |
| 893 | void LibcInfoWithPatchFunctions<T>::Perftools_deletearray(void *p) { |
| 894 | MallocHook::InvokeDeleteHook(p); |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame^] | 895 | do_free_with_callback(p, (void (*)(void*))origstub_fn_[kFree], false, 0); |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 896 | } |
| 897 | |
| 898 | template<int T> |
| 899 | void* LibcInfoWithPatchFunctions<T>::Perftools_new_nothrow( |
| 900 | size_t size, const std::nothrow_t&) __THROW { |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame^] | 901 | return malloc_fast_path<tcmalloc::cpp_nothrow_oom>(size); |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 902 | } |
| 903 | |
| 904 | template<int T> |
| 905 | void* LibcInfoWithPatchFunctions<T>::Perftools_newarray_nothrow( |
| 906 | size_t size, const std::nothrow_t&) __THROW { |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame^] | 907 | return malloc_fast_path<tcmalloc::cpp_nothrow_oom>(size); |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 908 | } |
| 909 | |
| 910 | template<int T> |
| 911 | void LibcInfoWithPatchFunctions<T>::Perftools_delete_nothrow( |
| 912 | void *p, const std::nothrow_t&) __THROW { |
| 913 | MallocHook::InvokeDeleteHook(p); |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame^] | 914 | do_free_with_callback(p, (void (*)(void*))origstub_fn_[kFree], false, 0); |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 915 | } |
| 916 | |
| 917 | template<int T> |
| 918 | void LibcInfoWithPatchFunctions<T>::Perftools_deletearray_nothrow( |
| 919 | void *p, const std::nothrow_t&) __THROW { |
| 920 | MallocHook::InvokeDeleteHook(p); |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame^] | 921 | do_free_with_callback(p, (void (*)(void*))origstub_fn_[kFree], false, 0); |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 922 | } |
| 923 | |
| 924 | |
| 925 | // _msize() lets you figure out how much space is reserved for a |
| 926 | // pointer, in Windows. Even if applications don't call it, any DLL |
| 927 | // with global constructors will call (transitively) something called |
| 928 | // __dllonexit_lk in order to make sure the destructors get called |
| 929 | // when the dll unloads. And that will call msize -- horrible things |
| 930 | // can ensue if this is not hooked. Other parts of libc may also call |
| 931 | // this internally. |
| 932 | |
| 933 | template<int T> |
| 934 | size_t LibcInfoWithPatchFunctions<T>::Perftools__msize(void* ptr) __THROW { |
| 935 | return GetSizeWithCallback(ptr, (size_t (*)(const void*))origstub_fn_[k_Msize]); |
| 936 | } |
| 937 | |
| 938 | // We need to define this because internal windows functions like to |
| 939 | // call into it(?). _expand() is like realloc but doesn't move the |
| 940 | // pointer. We punt, which will cause callers to fall back on realloc. |
| 941 | template<int T> |
| 942 | void* LibcInfoWithPatchFunctions<T>::Perftools__expand(void *ptr, |
| 943 | size_t size) __THROW { |
| 944 | return NULL; |
| 945 | } |
| 946 | |
| 947 | LPVOID WINAPI WindowsInfo::Perftools_HeapAlloc(HANDLE hHeap, DWORD dwFlags, |
| 948 | DWORD_PTR dwBytes) { |
| 949 | LPVOID result = ((LPVOID (WINAPI *)(HANDLE, DWORD, DWORD_PTR)) |
| 950 | function_info_[kHeapAlloc].origstub_fn)( |
| 951 | hHeap, dwFlags, dwBytes); |
| 952 | MallocHook::InvokeNewHook(result, dwBytes); |
| 953 | return result; |
| 954 | } |
| 955 | |
| 956 | BOOL WINAPI WindowsInfo::Perftools_HeapFree(HANDLE hHeap, DWORD dwFlags, |
| 957 | LPVOID lpMem) { |
| 958 | MallocHook::InvokeDeleteHook(lpMem); |
| 959 | return ((BOOL (WINAPI *)(HANDLE, DWORD, LPVOID)) |
| 960 | function_info_[kHeapFree].origstub_fn)( |
| 961 | hHeap, dwFlags, lpMem); |
| 962 | } |
| 963 | |
| 964 | LPVOID WINAPI WindowsInfo::Perftools_VirtualAllocEx(HANDLE process, |
| 965 | LPVOID address, |
| 966 | SIZE_T size, DWORD type, |
| 967 | DWORD protect) { |
| 968 | LPVOID result = ((LPVOID (WINAPI *)(HANDLE, LPVOID, SIZE_T, DWORD, DWORD)) |
| 969 | function_info_[kVirtualAllocEx].origstub_fn)( |
| 970 | process, address, size, type, protect); |
| 971 | // VirtualAllocEx() seems to be the Windows equivalent of mmap() |
| 972 | MallocHook::InvokeMmapHook(result, address, size, protect, type, -1, 0); |
| 973 | return result; |
| 974 | } |
| 975 | |
| 976 | BOOL WINAPI WindowsInfo::Perftools_VirtualFreeEx(HANDLE process, LPVOID address, |
| 977 | SIZE_T size, DWORD type) { |
| 978 | MallocHook::InvokeMunmapHook(address, size); |
| 979 | return ((BOOL (WINAPI *)(HANDLE, LPVOID, SIZE_T, DWORD)) |
| 980 | function_info_[kVirtualFreeEx].origstub_fn)( |
| 981 | process, address, size, type); |
| 982 | } |
| 983 | |
| 984 | LPVOID WINAPI WindowsInfo::Perftools_MapViewOfFileEx( |
| 985 | HANDLE hFileMappingObject, DWORD dwDesiredAccess, DWORD dwFileOffsetHigh, |
| 986 | DWORD dwFileOffsetLow, SIZE_T dwNumberOfBytesToMap, LPVOID lpBaseAddress) { |
| 987 | // For this function pair, you always deallocate the full block of |
| 988 | // data that you allocate, so NewHook/DeleteHook is the right API. |
| 989 | LPVOID result = ((LPVOID (WINAPI *)(HANDLE, DWORD, DWORD, DWORD, |
| 990 | SIZE_T, LPVOID)) |
| 991 | function_info_[kMapViewOfFileEx].origstub_fn)( |
| 992 | hFileMappingObject, dwDesiredAccess, dwFileOffsetHigh, |
| 993 | dwFileOffsetLow, dwNumberOfBytesToMap, lpBaseAddress); |
| 994 | MallocHook::InvokeNewHook(result, dwNumberOfBytesToMap); |
| 995 | return result; |
| 996 | } |
| 997 | |
| 998 | BOOL WINAPI WindowsInfo::Perftools_UnmapViewOfFile(LPCVOID lpBaseAddress) { |
| 999 | MallocHook::InvokeDeleteHook(lpBaseAddress); |
| 1000 | return ((BOOL (WINAPI *)(LPCVOID)) |
| 1001 | function_info_[kUnmapViewOfFile].origstub_fn)( |
| 1002 | lpBaseAddress); |
| 1003 | } |
| 1004 | |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 1005 | HMODULE WINAPI WindowsInfo::Perftools_LoadLibraryExW(LPCWSTR lpFileName, |
| 1006 | HANDLE hFile, |
| 1007 | DWORD dwFlags) { |
| 1008 | HMODULE rv; |
| 1009 | // Check to see if the modules is already loaded, flag 0 gets a |
| 1010 | // reference if it was loaded. If it was loaded no need to call |
| 1011 | // PatchAllModules, just increase the reference count to match |
| 1012 | // what GetModuleHandleExW does internally inside windows. |
| 1013 | if (::GetModuleHandleExW(0, lpFileName, &rv)) { |
| 1014 | return rv; |
| 1015 | } else { |
| 1016 | // Not already loaded, so load it. |
| 1017 | rv = ((HMODULE (WINAPI *)(LPCWSTR, HANDLE, DWORD)) |
| 1018 | function_info_[kLoadLibraryExW].origstub_fn)( |
| 1019 | lpFileName, hFile, dwFlags); |
| 1020 | // This will patch any newly loaded libraries, if patching needs |
| 1021 | // to be done. |
| 1022 | PatchAllModules(); |
| 1023 | |
| 1024 | return rv; |
| 1025 | } |
| 1026 | } |
| 1027 | |
| 1028 | BOOL WINAPI WindowsInfo::Perftools_FreeLibrary(HMODULE hLibModule) { |
| 1029 | BOOL rv = ((BOOL (WINAPI *)(HMODULE)) |
| 1030 | function_info_[kFreeLibrary].origstub_fn)(hLibModule); |
| 1031 | |
| 1032 | // Check to see if the module is still loaded by passing the base |
| 1033 | // address and seeing if it comes back with the same address. If it |
| 1034 | // is the same address it's still loaded, so the FreeLibrary() call |
| 1035 | // was a noop, and there's no need to redo the patching. |
| 1036 | HMODULE owner = NULL; |
| 1037 | BOOL result = ::GetModuleHandleExW( |
| 1038 | (GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | |
| 1039 | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT), |
| 1040 | (LPCWSTR)hLibModule, |
| 1041 | &owner); |
| 1042 | if (result && owner == hLibModule) |
| 1043 | return rv; |
| 1044 | |
| 1045 | PatchAllModules(); // this will fix up the list of patched libraries |
| 1046 | return rv; |
| 1047 | } |
| 1048 | |
| 1049 | |
| 1050 | // --------------------------------------------------------------------- |
| 1051 | // PatchWindowsFunctions() |
| 1052 | // This is the function that is exposed to the outside world. |
| 1053 | // It should be called before the program becomes multi-threaded, |
| 1054 | // since main_executable_windows.Patch() is not thread-safe. |
| 1055 | // --------------------------------------------------------------------- |
| 1056 | |
| 1057 | void PatchWindowsFunctions() { |
| 1058 | // This does the libc patching in every module, and the main executable. |
| 1059 | PatchAllModules(); |
| 1060 | main_executable_windows.Patch(); |
| 1061 | } |
| 1062 | |
| 1063 | #if 0 |
| 1064 | // It's possible to unpatch all the functions when we are exiting. |
| 1065 | |
| 1066 | // The idea is to handle properly windows-internal data that is |
| 1067 | // allocated before PatchWindowsFunctions is called. If all |
| 1068 | // destruction happened in reverse order from construction, then we |
| 1069 | // could call UnpatchWindowsFunctions at just the right time, so that |
| 1070 | // that early-allocated data would be freed using the windows |
| 1071 | // allocation functions rather than tcmalloc. The problem is that |
| 1072 | // windows allocates some structures lazily, so it would allocate them |
| 1073 | // late (using tcmalloc) and then try to deallocate them late as well. |
| 1074 | // So instead of unpatching, we just modify all the tcmalloc routines |
| 1075 | // so they call through to the libc rountines if the memory in |
| 1076 | // question doesn't seem to have been allocated with tcmalloc. I keep |
| 1077 | // this unpatch code around for reference. |
| 1078 | |
| 1079 | void UnpatchWindowsFunctions() { |
| 1080 | // We need to go back to the system malloc/etc at global destruct time, |
| 1081 | // so objects that were constructed before tcmalloc, using the system |
| 1082 | // malloc, can destroy themselves using the system free. This depends |
| 1083 | // on DLLs unloading in the reverse order in which they load! |
| 1084 | // |
| 1085 | // We also go back to the default HeapAlloc/etc, just for consistency. |
| 1086 | // Who knows, it may help avoid weird bugs in some situations. |
| 1087 | main_executable_windows.Unpatch(); |
| 1088 | main_executable.Unpatch(); |
| 1089 | if (libc1.is_valid()) libc1.Unpatch(); |
| 1090 | if (libc2.is_valid()) libc2.Unpatch(); |
| 1091 | if (libc3.is_valid()) libc3.Unpatch(); |
| 1092 | if (libc4.is_valid()) libc4.Unpatch(); |
| 1093 | if (libc5.is_valid()) libc5.Unpatch(); |
| 1094 | if (libc6.is_valid()) libc6.Unpatch(); |
| 1095 | if (libc7.is_valid()) libc7.Unpatch(); |
| 1096 | if (libc8.is_valid()) libc8.Unpatch(); |
| 1097 | } |
| 1098 | #endif |