Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 1 | // -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- |
| 2 | // Copyright (c) 2008, Google Inc. |
| 3 | // All rights reserved. |
| 4 | // |
| 5 | // Redistribution and use in source and binary forms, with or without |
| 6 | // modification, are permitted provided that the following conditions are |
| 7 | // met: |
| 8 | // |
| 9 | // * Redistributions of source code must retain the above copyright |
| 10 | // notice, this list of conditions and the following disclaimer. |
| 11 | // * Redistributions in binary form must reproduce the above |
| 12 | // copyright notice, this list of conditions and the following disclaimer |
| 13 | // in the documentation and/or other materials provided with the |
| 14 | // distribution. |
| 15 | // * Neither the name of Google Inc. nor the names of its |
| 16 | // contributors may be used to endorse or promote products derived from |
| 17 | // this software without specific prior written permission. |
| 18 | // |
| 19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 30 | |
| 31 | // --- |
| 32 | // Author: Sanjay Ghemawat <opensource@google.com> |
| 33 | // |
| 34 | // Common definitions for tcmalloc code. |
| 35 | |
| 36 | #ifndef TCMALLOC_COMMON_H_ |
| 37 | #define TCMALLOC_COMMON_H_ |
| 38 | |
| 39 | #include "config.h" |
| 40 | #include <stddef.h> // for size_t |
| 41 | #ifdef HAVE_STDINT_H |
| 42 | #include <stdint.h> // for uintptr_t, uint64_t |
| 43 | #endif |
| 44 | #include "internal_logging.h" // for ASSERT, etc |
| 45 | #include "base/basictypes.h" // for LIKELY, etc |
| 46 | |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 47 | // Type that can hold a page number |
| 48 | typedef uintptr_t PageID; |
| 49 | |
| 50 | // Type that can hold the length of a run of pages |
| 51 | typedef uintptr_t Length; |
| 52 | |
| 53 | //------------------------------------------------------------------- |
| 54 | // Configuration |
| 55 | //------------------------------------------------------------------- |
| 56 | |
| 57 | #if defined(TCMALLOC_ALIGN_8BYTES) |
| 58 | // Unless we force to use 8 bytes alignment we use an alignment of |
| 59 | // at least 16 bytes to statisfy requirements for some SSE types. |
| 60 | // Keep in mind when using the 16 bytes alignment you can have a space |
| 61 | // waste due alignment of 25%. (eg malloc of 24 bytes will get 32 bytes) |
| 62 | static const size_t kMinAlign = 8; |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 63 | #else |
| 64 | static const size_t kMinAlign = 16; |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 65 | #endif |
| 66 | |
| 67 | // Using large pages speeds up the execution at a cost of larger memory use. |
| 68 | // Deallocation may speed up by a factor as the page map gets 8x smaller, so |
| 69 | // lookups in the page map result in fewer L2 cache misses, which translates to |
| 70 | // speedup for application/platform combinations with high L2 cache pressure. |
| 71 | // As the number of size classes increases with large pages, we increase |
| 72 | // the thread cache allowance to avoid passing more free ranges to and from |
| 73 | // central lists. Also, larger pages are less likely to get freed. |
| 74 | // These two factors cause a bounded increase in memory use. |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 75 | #if defined(TCMALLOC_PAGE_SIZE_SHIFT) |
| 76 | static const size_t kPageShift = TCMALLOC_PAGE_SIZE_SHIFT; |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 77 | #else |
| 78 | static const size_t kPageShift = 13; |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 79 | #endif |
| 80 | |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 81 | static const size_t kClassSizesMax = 128; |
| 82 | |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 83 | static const size_t kMaxThreadCacheSize = 4 << 20; |
| 84 | |
| 85 | static const size_t kPageSize = 1 << kPageShift; |
| 86 | static const size_t kMaxSize = 256 * 1024; |
| 87 | static const size_t kAlignment = 8; |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 88 | // For all span-lengths <= kMaxPages we keep an exact-size list in PageHeap. |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 89 | static const size_t kMaxPages = 1 << (20 - kPageShift); |
| 90 | |
| 91 | // Default bound on the total amount of thread caches. |
| 92 | #ifdef TCMALLOC_SMALL_BUT_SLOW |
| 93 | // Make the overall thread cache no bigger than that of a single thread |
| 94 | // for the small memory footprint case. |
| 95 | static const size_t kDefaultOverallThreadCacheSize = kMaxThreadCacheSize; |
| 96 | #else |
| 97 | static const size_t kDefaultOverallThreadCacheSize = 8u * kMaxThreadCacheSize; |
| 98 | #endif |
| 99 | |
| 100 | // Lower bound on the per-thread cache sizes |
| 101 | static const size_t kMinThreadCacheSize = kMaxSize * 2; |
| 102 | |
| 103 | // The number of bytes one ThreadCache will steal from another when |
| 104 | // the first ThreadCache is forced to Scavenge(), delaying the |
| 105 | // next call to Scavenge for this thread. |
| 106 | static const size_t kStealAmount = 1 << 16; |
| 107 | |
| 108 | // The number of times that a deallocation can cause a freelist to |
| 109 | // go over its max_length() before shrinking max_length(). |
| 110 | static const int kMaxOverages = 3; |
| 111 | |
| 112 | // Maximum length we allow a per-thread free-list to have before we |
| 113 | // move objects from it into the corresponding central free-list. We |
| 114 | // want this big to avoid locking the central free-list too often. It |
| 115 | // should not hurt to make this list somewhat big because the |
| 116 | // scavenging code will shrink it down when its contents are not in use. |
| 117 | static const int kMaxDynamicFreeListLength = 8192; |
| 118 | |
| 119 | static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift; |
| 120 | |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 121 | #if __aarch64__ || __x86_64__ || _M_AMD64 || _M_ARM64 |
| 122 | // All current x86_64 processors only look at the lower 48 bits in |
| 123 | // virtual to physical address translation. The top 16 are all same as |
| 124 | // bit 47. And bit 47 value 1 reserved for kernel-space addresses in |
| 125 | // practice. So it is actually 47 usable bits from malloc |
| 126 | // perspective. This lets us use faster two level page maps on this |
| 127 | // architecture. |
| 128 | // |
| 129 | // There is very similar story on 64-bit arms except it has full 48 |
| 130 | // bits for user-space. Because of that, and because in principle OSes |
| 131 | // can start giving some of highest-bit-set addresses to user-space, |
| 132 | // we don't bother to limit x86 to 47 bits. |
| 133 | // |
| 134 | // As of now there are published plans to add more bits to x86-64 |
| 135 | // virtual address space, but since 48 bits has been norm for long |
| 136 | // time and lots of software is relying on it, it will be opt-in from |
| 137 | // OS perspective. So we can keep doing "48 bits" at least for now. |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 138 | static const int kAddressBits = (sizeof(void*) < 8 ? (8 * sizeof(void*)) : 48); |
| 139 | #else |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 140 | // mipsen and ppcs have more general hardware so we have to support |
| 141 | // full 64-bits of addresses. |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 142 | static const int kAddressBits = 8 * sizeof(void*); |
| 143 | #endif |
| 144 | |
| 145 | namespace tcmalloc { |
| 146 | |
| 147 | // Convert byte size into pages. This won't overflow, but may return |
| 148 | // an unreasonably large value if bytes is huge enough. |
| 149 | inline Length pages(size_t bytes) { |
| 150 | return (bytes >> kPageShift) + |
| 151 | ((bytes & (kPageSize - 1)) > 0 ? 1 : 0); |
| 152 | } |
| 153 | |
| 154 | // For larger allocation sizes, we use larger memory alignments to |
| 155 | // reduce the number of size classes. |
| 156 | int AlignmentForSize(size_t size); |
| 157 | |
| 158 | // Size-class information + mapping |
| 159 | class SizeMap { |
| 160 | private: |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 161 | //------------------------------------------------------------------- |
| 162 | // Mapping from size to size_class and vice versa |
| 163 | //------------------------------------------------------------------- |
| 164 | |
| 165 | // Sizes <= 1024 have an alignment >= 8. So for such sizes we have an |
| 166 | // array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128. |
| 167 | // So for these larger sizes we have an array indexed by ceil(size/128). |
| 168 | // |
| 169 | // We flatten both logical arrays into one physical array and use |
| 170 | // arithmetic to compute an appropriate index. The constants used by |
| 171 | // ClassIndex() were selected to make the flattening work. |
| 172 | // |
| 173 | // Examples: |
| 174 | // Size Expression Index |
| 175 | // ------------------------------------------------------- |
| 176 | // 0 (0 + 7) / 8 0 |
| 177 | // 1 (1 + 7) / 8 1 |
| 178 | // ... |
| 179 | // 1024 (1024 + 7) / 8 128 |
| 180 | // 1025 (1025 + 127 + (120<<7)) / 128 129 |
| 181 | // ... |
| 182 | // 32768 (32768 + 127 + (120<<7)) / 128 376 |
| 183 | static const int kMaxSmallSize = 1024; |
| 184 | static const size_t kClassArraySize = |
| 185 | ((kMaxSize + 127 + (120 << 7)) >> 7) + 1; |
| 186 | unsigned char class_array_[kClassArraySize]; |
| 187 | |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 188 | static inline size_t SmallSizeClass(size_t s) { |
| 189 | return (static_cast<uint32_t>(s) + 7) >> 3; |
| 190 | } |
| 191 | |
| 192 | static inline size_t LargeSizeClass(size_t s) { |
| 193 | return (static_cast<uint32_t>(s) + 127 + (120 << 7)) >> 7; |
| 194 | } |
| 195 | |
| 196 | // If size is no more than kMaxSize, compute index of the |
| 197 | // class_array[] entry for it, putting the class index in output |
| 198 | // parameter idx and returning true. Otherwise return false. |
| 199 | static inline bool ATTRIBUTE_ALWAYS_INLINE ClassIndexMaybe(size_t s, |
| 200 | uint32* idx) { |
| 201 | if (PREDICT_TRUE(s <= kMaxSmallSize)) { |
| 202 | *idx = (static_cast<uint32>(s) + 7) >> 3; |
| 203 | return true; |
| 204 | } else if (s <= kMaxSize) { |
| 205 | *idx = (static_cast<uint32>(s) + 127 + (120 << 7)) >> 7; |
| 206 | return true; |
| 207 | } |
| 208 | return false; |
| 209 | } |
| 210 | |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 211 | // Compute index of the class_array[] entry for a given size |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 212 | static inline size_t ClassIndex(size_t s) { |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 213 | // Use unsigned arithmetic to avoid unnecessary sign extensions. |
| 214 | ASSERT(0 <= s); |
| 215 | ASSERT(s <= kMaxSize); |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 216 | if (PREDICT_TRUE(s <= kMaxSmallSize)) { |
| 217 | return SmallSizeClass(s); |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 218 | } else { |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 219 | return LargeSizeClass(s); |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 220 | } |
| 221 | } |
| 222 | |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 223 | // Number of objects to move between a per-thread list and a central |
| 224 | // list in one shot. We want this to be not too small so we can |
| 225 | // amortize the lock overhead for accessing the central list. Making |
| 226 | // it too big may temporarily cause unnecessary memory wastage in the |
| 227 | // per-thread free list until the scavenger cleans up the list. |
| 228 | int num_objects_to_move_[kClassSizesMax]; |
| 229 | |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 230 | int NumMoveSize(size_t size); |
| 231 | |
| 232 | // Mapping from size class to max size storable in that class |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 233 | int32 class_to_size_[kClassSizesMax]; |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 234 | |
| 235 | // Mapping from size class to number of pages to allocate at a time |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 236 | size_t class_to_pages_[kClassSizesMax]; |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 237 | |
| 238 | public: |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 239 | size_t num_size_classes; |
| 240 | |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 241 | // Constructor should do nothing since we rely on explicit Init() |
| 242 | // call, which may or may not be called before the constructor runs. |
| 243 | SizeMap() { } |
| 244 | |
| 245 | // Initialize the mapping arrays |
| 246 | void Init(); |
| 247 | |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 248 | inline int SizeClass(size_t size) { |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 249 | return class_array_[ClassIndex(size)]; |
| 250 | } |
| 251 | |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 252 | // Check if size is small enough to be representable by a size |
| 253 | // class, and if it is, put matching size class into *cl. Returns |
| 254 | // true iff matching size class was found. |
| 255 | inline bool ATTRIBUTE_ALWAYS_INLINE GetSizeClass(size_t size, uint32* cl) { |
| 256 | uint32 idx; |
| 257 | if (!ClassIndexMaybe(size, &idx)) { |
| 258 | return false; |
| 259 | } |
| 260 | *cl = class_array_[idx]; |
| 261 | return true; |
| 262 | } |
| 263 | |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 264 | // Get the byte-size for a specified class |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 265 | inline int32 ATTRIBUTE_ALWAYS_INLINE ByteSizeForClass(uint32 cl) { |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 266 | return class_to_size_[cl]; |
| 267 | } |
| 268 | |
| 269 | // Mapping from size class to max size storable in that class |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 270 | inline int32 class_to_size(uint32 cl) { |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 271 | return class_to_size_[cl]; |
| 272 | } |
| 273 | |
| 274 | // Mapping from size class to number of pages to allocate at a time |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 275 | inline size_t class_to_pages(uint32 cl) { |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 276 | return class_to_pages_[cl]; |
| 277 | } |
| 278 | |
| 279 | // Number of objects to move between a per-thread list and a central |
| 280 | // list in one shot. We want this to be not too small so we can |
| 281 | // amortize the lock overhead for accessing the central list. Making |
| 282 | // it too big may temporarily cause unnecessary memory wastage in the |
| 283 | // per-thread free list until the scavenger cleans up the list. |
Brian Silverman | 20350ac | 2021-11-17 18:19:55 -0800 | [diff] [blame] | 284 | inline int num_objects_to_move(uint32 cl) { |
Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 285 | return num_objects_to_move_[cl]; |
| 286 | } |
| 287 | }; |
| 288 | |
| 289 | // Allocates "bytes" worth of memory and returns it. Increments |
| 290 | // metadata_system_bytes appropriately. May return NULL if allocation |
| 291 | // fails. Requires pageheap_lock is held. |
| 292 | void* MetaDataAlloc(size_t bytes); |
| 293 | |
| 294 | // Returns the total number of bytes allocated from the system. |
| 295 | // Requires pageheap_lock is held. |
| 296 | uint64_t metadata_system_bytes(); |
| 297 | |
| 298 | // size/depth are made the same size as a pointer so that some generic |
| 299 | // code below can conveniently cast them back and forth to void*. |
| 300 | static const int kMaxStackDepth = 31; |
| 301 | struct StackTrace { |
| 302 | uintptr_t size; // Size of object |
| 303 | uintptr_t depth; // Number of PC values stored in array below |
| 304 | void* stack[kMaxStackDepth]; |
| 305 | }; |
| 306 | |
| 307 | } // namespace tcmalloc |
| 308 | |
| 309 | #endif // TCMALLOC_COMMON_H_ |