Austin Schuh | 745610d | 2015-09-06 18:19:50 -0700 | [diff] [blame] | 1 | // -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*- |
| 2 | // Copyright (c) 2008, Google Inc. |
| 3 | // All rights reserved. |
| 4 | // |
| 5 | // Redistribution and use in source and binary forms, with or without |
| 6 | // modification, are permitted provided that the following conditions are |
| 7 | // met: |
| 8 | // |
| 9 | // * Redistributions of source code must retain the above copyright |
| 10 | // notice, this list of conditions and the following disclaimer. |
| 11 | // * Redistributions in binary form must reproduce the above |
| 12 | // copyright notice, this list of conditions and the following disclaimer |
| 13 | // in the documentation and/or other materials provided with the |
| 14 | // distribution. |
| 15 | // * Neither the name of Google Inc. nor the names of its |
| 16 | // contributors may be used to endorse or promote products derived from |
| 17 | // this software without specific prior written permission. |
| 18 | // |
| 19 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 20 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 21 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 22 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 23 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 25 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 26 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 27 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 28 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 29 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 30 | |
| 31 | // --- |
| 32 | // Author: Sanjay Ghemawat <opensource@google.com> |
| 33 | |
| 34 | #include <stdlib.h> // for getenv and strtol |
| 35 | #include "config.h" |
| 36 | #include "common.h" |
| 37 | #include "system-alloc.h" |
| 38 | #include "base/spinlock.h" |
| 39 | #include "getenv_safe.h" // TCMallocGetenvSafe |
| 40 | |
| 41 | namespace tcmalloc { |
| 42 | |
| 43 | // Define the maximum number of object per classe type to transfer between |
| 44 | // thread and central caches. |
| 45 | static int32 FLAGS_tcmalloc_transfer_num_objects; |
| 46 | |
| 47 | static const int32 kDefaultTransferNumObjecs = 32768; |
| 48 | |
| 49 | // The init function is provided to explicit initialize the variable value |
| 50 | // from the env. var to avoid C++ global construction that might defer its |
| 51 | // initialization after a malloc/new call. |
| 52 | static inline void InitTCMallocTransferNumObjects() |
| 53 | { |
| 54 | if (UNLIKELY(FLAGS_tcmalloc_transfer_num_objects == 0)) { |
| 55 | const char *envval = TCMallocGetenvSafe("TCMALLOC_TRANSFER_NUM_OBJ"); |
| 56 | FLAGS_tcmalloc_transfer_num_objects = !envval ? kDefaultTransferNumObjecs : |
| 57 | strtol(envval, NULL, 10); |
| 58 | } |
| 59 | } |
| 60 | |
| 61 | // Note: the following only works for "n"s that fit in 32-bits, but |
| 62 | // that is fine since we only use it for small sizes. |
| 63 | static inline int LgFloor(size_t n) { |
| 64 | int log = 0; |
| 65 | for (int i = 4; i >= 0; --i) { |
| 66 | int shift = (1 << i); |
| 67 | size_t x = n >> shift; |
| 68 | if (x != 0) { |
| 69 | n = x; |
| 70 | log += shift; |
| 71 | } |
| 72 | } |
| 73 | ASSERT(n == 1); |
| 74 | return log; |
| 75 | } |
| 76 | |
| 77 | int AlignmentForSize(size_t size) { |
| 78 | int alignment = kAlignment; |
| 79 | if (size > kMaxSize) { |
| 80 | // Cap alignment at kPageSize for large sizes. |
| 81 | alignment = kPageSize; |
| 82 | } else if (size >= 128) { |
| 83 | // Space wasted due to alignment is at most 1/8, i.e., 12.5%. |
| 84 | alignment = (1 << LgFloor(size)) / 8; |
| 85 | } else if (size >= kMinAlign) { |
| 86 | // We need an alignment of at least 16 bytes to satisfy |
| 87 | // requirements for some SSE types. |
| 88 | alignment = kMinAlign; |
| 89 | } |
| 90 | // Maximum alignment allowed is page size alignment. |
| 91 | if (alignment > kPageSize) { |
| 92 | alignment = kPageSize; |
| 93 | } |
| 94 | CHECK_CONDITION(size < kMinAlign || alignment >= kMinAlign); |
| 95 | CHECK_CONDITION((alignment & (alignment - 1)) == 0); |
| 96 | return alignment; |
| 97 | } |
| 98 | |
| 99 | int SizeMap::NumMoveSize(size_t size) { |
| 100 | if (size == 0) return 0; |
| 101 | // Use approx 64k transfers between thread and central caches. |
| 102 | int num = static_cast<int>(64.0 * 1024.0 / size); |
| 103 | if (num < 2) num = 2; |
| 104 | |
| 105 | // Avoid bringing too many objects into small object free lists. |
| 106 | // If this value is too large: |
| 107 | // - We waste memory with extra objects sitting in the thread caches. |
| 108 | // - The central freelist holds its lock for too long while |
| 109 | // building a linked list of objects, slowing down the allocations |
| 110 | // of other threads. |
| 111 | // If this value is too small: |
| 112 | // - We go to the central freelist too often and we have to acquire |
| 113 | // its lock each time. |
| 114 | // This value strikes a balance between the constraints above. |
| 115 | if (num > FLAGS_tcmalloc_transfer_num_objects) |
| 116 | num = FLAGS_tcmalloc_transfer_num_objects; |
| 117 | |
| 118 | return num; |
| 119 | } |
| 120 | |
| 121 | // Initialize the mapping arrays |
| 122 | void SizeMap::Init() { |
| 123 | InitTCMallocTransferNumObjects(); |
| 124 | |
| 125 | // Do some sanity checking on add_amount[]/shift_amount[]/class_array[] |
| 126 | if (ClassIndex(0) != 0) { |
| 127 | Log(kCrash, __FILE__, __LINE__, |
| 128 | "Invalid class index for size 0", ClassIndex(0)); |
| 129 | } |
| 130 | if (ClassIndex(kMaxSize) >= sizeof(class_array_)) { |
| 131 | Log(kCrash, __FILE__, __LINE__, |
| 132 | "Invalid class index for kMaxSize", ClassIndex(kMaxSize)); |
| 133 | } |
| 134 | |
| 135 | // Compute the size classes we want to use |
| 136 | int sc = 1; // Next size class to assign |
| 137 | int alignment = kAlignment; |
| 138 | CHECK_CONDITION(kAlignment <= kMinAlign); |
| 139 | for (size_t size = kAlignment; size <= kMaxSize; size += alignment) { |
| 140 | alignment = AlignmentForSize(size); |
| 141 | CHECK_CONDITION((size % alignment) == 0); |
| 142 | |
| 143 | int blocks_to_move = NumMoveSize(size) / 4; |
| 144 | size_t psize = 0; |
| 145 | do { |
| 146 | psize += kPageSize; |
| 147 | // Allocate enough pages so leftover is less than 1/8 of total. |
| 148 | // This bounds wasted space to at most 12.5%. |
| 149 | while ((psize % size) > (psize >> 3)) { |
| 150 | psize += kPageSize; |
| 151 | } |
| 152 | // Continue to add pages until there are at least as many objects in |
| 153 | // the span as are needed when moving objects from the central |
| 154 | // freelists and spans to the thread caches. |
| 155 | } while ((psize / size) < (blocks_to_move)); |
| 156 | const size_t my_pages = psize >> kPageShift; |
| 157 | |
| 158 | if (sc > 1 && my_pages == class_to_pages_[sc-1]) { |
| 159 | // See if we can merge this into the previous class without |
| 160 | // increasing the fragmentation of the previous class. |
| 161 | const size_t my_objects = (my_pages << kPageShift) / size; |
| 162 | const size_t prev_objects = (class_to_pages_[sc-1] << kPageShift) |
| 163 | / class_to_size_[sc-1]; |
| 164 | if (my_objects == prev_objects) { |
| 165 | // Adjust last class to include this size |
| 166 | class_to_size_[sc-1] = size; |
| 167 | continue; |
| 168 | } |
| 169 | } |
| 170 | |
| 171 | // Add new class |
| 172 | class_to_pages_[sc] = my_pages; |
| 173 | class_to_size_[sc] = size; |
| 174 | sc++; |
| 175 | } |
| 176 | if (sc != kNumClasses) { |
| 177 | Log(kCrash, __FILE__, __LINE__, |
| 178 | "wrong number of size classes: (found vs. expected )", sc, kNumClasses); |
| 179 | } |
| 180 | |
| 181 | // Initialize the mapping arrays |
| 182 | int next_size = 0; |
| 183 | for (int c = 1; c < kNumClasses; c++) { |
| 184 | const int max_size_in_class = class_to_size_[c]; |
| 185 | for (int s = next_size; s <= max_size_in_class; s += kAlignment) { |
| 186 | class_array_[ClassIndex(s)] = c; |
| 187 | } |
| 188 | next_size = max_size_in_class + kAlignment; |
| 189 | } |
| 190 | |
| 191 | // Double-check sizes just to be safe |
| 192 | for (size_t size = 0; size <= kMaxSize;) { |
| 193 | const int sc = SizeClass(size); |
| 194 | if (sc <= 0 || sc >= kNumClasses) { |
| 195 | Log(kCrash, __FILE__, __LINE__, |
| 196 | "Bad size class (class, size)", sc, size); |
| 197 | } |
| 198 | if (sc > 1 && size <= class_to_size_[sc-1]) { |
| 199 | Log(kCrash, __FILE__, __LINE__, |
| 200 | "Allocating unnecessarily large class (class, size)", sc, size); |
| 201 | } |
| 202 | const size_t s = class_to_size_[sc]; |
| 203 | if (size > s || s == 0) { |
| 204 | Log(kCrash, __FILE__, __LINE__, |
| 205 | "Bad (class, size, requested)", sc, s, size); |
| 206 | } |
| 207 | if (size <= kMaxSmallSize) { |
| 208 | size += 8; |
| 209 | } else { |
| 210 | size += 128; |
| 211 | } |
| 212 | } |
| 213 | |
| 214 | // Initialize the num_objects_to_move array. |
| 215 | for (size_t cl = 1; cl < kNumClasses; ++cl) { |
| 216 | num_objects_to_move_[cl] = NumMoveSize(ByteSizeForClass(cl)); |
| 217 | } |
| 218 | } |
| 219 | |
| 220 | // Metadata allocator -- keeps stats about how many bytes allocated. |
| 221 | static uint64_t metadata_system_bytes_ = 0; |
| 222 | static const size_t kMetadataAllocChunkSize = 8*1024*1024; |
| 223 | static const size_t kMetadataBigAllocThreshold = kMetadataAllocChunkSize / 8; |
| 224 | // usually malloc uses larger alignments, but because metadata cannot |
| 225 | // have and fancy simd types, aligning on pointer size seems fine |
| 226 | static const size_t kMetadataAllignment = sizeof(void *); |
| 227 | |
| 228 | static char *metadata_chunk_alloc_; |
| 229 | static size_t metadata_chunk_avail_; |
| 230 | |
| 231 | static SpinLock metadata_alloc_lock(SpinLock::LINKER_INITIALIZED); |
| 232 | |
| 233 | void* MetaDataAlloc(size_t bytes) { |
| 234 | if (bytes >= kMetadataAllocChunkSize) { |
| 235 | void *rv = TCMalloc_SystemAlloc(bytes, |
| 236 | NULL, kMetadataAllignment); |
| 237 | if (rv != NULL) { |
| 238 | metadata_system_bytes_ += bytes; |
| 239 | } |
| 240 | return rv; |
| 241 | } |
| 242 | |
| 243 | SpinLockHolder h(&metadata_alloc_lock); |
| 244 | |
| 245 | // the following works by essentially turning address to integer of |
| 246 | // log_2 kMetadataAllignment size and negating it. I.e. negated |
| 247 | // value + original value gets 0 and that's what we want modulo |
| 248 | // kMetadataAllignment. Note, we negate before masking higher bits |
| 249 | // off, otherwise we'd have to mask them off after negation anyways. |
| 250 | intptr_t alignment = -reinterpret_cast<intptr_t>(metadata_chunk_alloc_) & (kMetadataAllignment-1); |
| 251 | |
| 252 | if (metadata_chunk_avail_ < bytes + alignment) { |
| 253 | size_t real_size; |
| 254 | void *ptr = TCMalloc_SystemAlloc(kMetadataAllocChunkSize, |
| 255 | &real_size, kMetadataAllignment); |
| 256 | if (ptr == NULL) { |
| 257 | return NULL; |
| 258 | } |
| 259 | |
| 260 | metadata_chunk_alloc_ = static_cast<char *>(ptr); |
| 261 | metadata_chunk_avail_ = real_size; |
| 262 | |
| 263 | alignment = 0; |
| 264 | } |
| 265 | |
| 266 | void *rv = static_cast<void *>(metadata_chunk_alloc_ + alignment); |
| 267 | bytes += alignment; |
| 268 | metadata_chunk_alloc_ += bytes; |
| 269 | metadata_chunk_avail_ -= bytes; |
| 270 | metadata_system_bytes_ += bytes; |
| 271 | return rv; |
| 272 | } |
| 273 | |
| 274 | uint64_t metadata_system_bytes() { return metadata_system_bytes_; } |
| 275 | |
| 276 | } // namespace tcmalloc |