blob: 21d0f8e70f48c0ff42028e754568965d93eb12da [file] [log] [blame]
Austin Schuh745610d2015-09-06 18:19:50 -07001// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
2// Copyright (c) 2008, Google Inc.
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// ---
32// Author: Ken Ashcraft <opensource@google.com>
33
34#include <config.h>
35#include "thread_cache.h"
36#include <errno.h>
37#include <string.h> // for memcpy
38#include <algorithm> // for max, min
39#include "base/commandlineflags.h" // for SpinLockHolder
40#include "base/spinlock.h" // for SpinLockHolder
41#include "getenv_safe.h" // for TCMallocGetenvSafe
42#include "central_freelist.h" // for CentralFreeListPadded
43#include "maybe_threads.h"
44
45using std::min;
46using std::max;
47
48// Note: this is initialized manually in InitModule to ensure that
49// it's configured at right time
50//
51// DEFINE_int64(tcmalloc_max_total_thread_cache_bytes,
52// EnvToInt64("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES",
53// kDefaultOverallThreadCacheSize),
54// "Bound on the total amount of bytes allocated to "
55// "thread caches. This bound is not strict, so it is possible "
56// "for the cache to go over this bound in certain circumstances. "
57// "Maximum value of this flag is capped to 1 GB.");
58
59
60namespace tcmalloc {
61
62static bool phinited = false;
63
64volatile size_t ThreadCache::per_thread_cache_size_ = kMaxThreadCacheSize;
65size_t ThreadCache::overall_thread_cache_size_ = kDefaultOverallThreadCacheSize;
66ssize_t ThreadCache::unclaimed_cache_space_ = kDefaultOverallThreadCacheSize;
67PageHeapAllocator<ThreadCache> threadcache_allocator;
68ThreadCache* ThreadCache::thread_heaps_ = NULL;
69int ThreadCache::thread_heap_count_ = 0;
70ThreadCache* ThreadCache::next_memory_steal_ = NULL;
71#ifdef HAVE_TLS
72__thread ThreadCache::ThreadLocalData ThreadCache::threadlocal_data_
Brian Silverman20350ac2021-11-17 18:19:55 -080073 ATTR_INITIAL_EXEC CACHELINE_ALIGNED;
Austin Schuh745610d2015-09-06 18:19:50 -070074#endif
75bool ThreadCache::tsd_inited_ = false;
76pthread_key_t ThreadCache::heap_key_;
77
78void ThreadCache::Init(pthread_t tid) {
79 size_ = 0;
80
81 max_size_ = 0;
82 IncreaseCacheLimitLocked();
83 if (max_size_ == 0) {
84 // There isn't enough memory to go around. Just give the minimum to
85 // this thread.
Brian Silverman20350ac2021-11-17 18:19:55 -080086 SetMaxSize(kMinThreadCacheSize);
Austin Schuh745610d2015-09-06 18:19:50 -070087
88 // Take unclaimed_cache_space_ negative.
89 unclaimed_cache_space_ -= kMinThreadCacheSize;
90 ASSERT(unclaimed_cache_space_ < 0);
91 }
92
93 next_ = NULL;
94 prev_ = NULL;
95 tid_ = tid;
96 in_setspecific_ = false;
Brian Silverman20350ac2021-11-17 18:19:55 -080097 for (uint32 cl = 0; cl < Static::num_size_classes(); ++cl) {
98 list_[cl].Init(Static::sizemap()->class_to_size(cl));
Austin Schuh745610d2015-09-06 18:19:50 -070099 }
100
101 uint32_t sampler_seed;
102 memcpy(&sampler_seed, &tid, sizeof(sampler_seed));
103 sampler_.Init(sampler_seed);
104}
105
106void ThreadCache::Cleanup() {
107 // Put unused memory back into central cache
Brian Silverman20350ac2021-11-17 18:19:55 -0800108 for (uint32 cl = 0; cl < Static::num_size_classes(); ++cl) {
Austin Schuh745610d2015-09-06 18:19:50 -0700109 if (list_[cl].length() > 0) {
110 ReleaseToCentralCache(&list_[cl], cl, list_[cl].length());
111 }
112 }
113}
114
115// Remove some objects of class "cl" from central cache and add to thread heap.
116// On success, return the first object for immediate use; otherwise return NULL.
Brian Silverman20350ac2021-11-17 18:19:55 -0800117void* ThreadCache::FetchFromCentralCache(uint32 cl, int32_t byte_size,
118 void *(*oom_handler)(size_t size)) {
Austin Schuh745610d2015-09-06 18:19:50 -0700119 FreeList* list = &list_[cl];
120 ASSERT(list->empty());
121 const int batch_size = Static::sizemap()->num_objects_to_move(cl);
122
123 const int num_to_move = min<int>(list->max_length(), batch_size);
124 void *start, *end;
125 int fetch_count = Static::central_cache()[cl].RemoveRange(
126 &start, &end, num_to_move);
127
Brian Silverman20350ac2021-11-17 18:19:55 -0800128 if (fetch_count == 0) {
129 ASSERT(start == NULL);
130 return oom_handler(byte_size);
131 }
132 ASSERT(start != NULL);
133
Austin Schuh745610d2015-09-06 18:19:50 -0700134 if (--fetch_count >= 0) {
135 size_ += byte_size * fetch_count;
136 list->PushRange(fetch_count, SLL_Next(start), end);
137 }
138
139 // Increase max length slowly up to batch_size. After that,
140 // increase by batch_size in one shot so that the length is a
141 // multiple of batch_size.
142 if (list->max_length() < batch_size) {
143 list->set_max_length(list->max_length() + 1);
144 } else {
145 // Don't let the list get too long. In 32 bit builds, the length
146 // is represented by a 16 bit int, so we need to watch out for
147 // integer overflow.
148 int new_length = min<int>(list->max_length() + batch_size,
149 kMaxDynamicFreeListLength);
150 // The list's max_length must always be a multiple of batch_size,
151 // and kMaxDynamicFreeListLength is not necessarily a multiple
152 // of batch_size.
153 new_length -= new_length % batch_size;
154 ASSERT(new_length % batch_size == 0);
155 list->set_max_length(new_length);
156 }
157 return start;
158}
159
Brian Silverman20350ac2021-11-17 18:19:55 -0800160void ThreadCache::ListTooLong(FreeList* list, uint32 cl) {
161 size_ += list->object_size();
162
Austin Schuh745610d2015-09-06 18:19:50 -0700163 const int batch_size = Static::sizemap()->num_objects_to_move(cl);
164 ReleaseToCentralCache(list, cl, batch_size);
165
166 // If the list is too long, we need to transfer some number of
167 // objects to the central cache. Ideally, we would transfer
168 // num_objects_to_move, so the code below tries to make max_length
169 // converge on num_objects_to_move.
170
171 if (list->max_length() < batch_size) {
172 // Slow start the max_length so we don't overreserve.
173 list->set_max_length(list->max_length() + 1);
174 } else if (list->max_length() > batch_size) {
175 // If we consistently go over max_length, shrink max_length. If we don't
176 // shrink it, some amount of memory will always stay in this freelist.
177 list->set_length_overages(list->length_overages() + 1);
178 if (list->length_overages() > kMaxOverages) {
179 ASSERT(list->max_length() > batch_size);
180 list->set_max_length(list->max_length() - batch_size);
181 list->set_length_overages(0);
182 }
183 }
Brian Silverman20350ac2021-11-17 18:19:55 -0800184
185 if (PREDICT_FALSE(size_ > max_size_)) {
186 Scavenge();
187 }
Austin Schuh745610d2015-09-06 18:19:50 -0700188}
189
190// Remove some objects of class "cl" from thread heap and add to central cache
Brian Silverman20350ac2021-11-17 18:19:55 -0800191void ThreadCache::ReleaseToCentralCache(FreeList* src, uint32 cl, int N) {
Austin Schuh745610d2015-09-06 18:19:50 -0700192 ASSERT(src == &list_[cl]);
193 if (N > src->length()) N = src->length();
194 size_t delta_bytes = N * Static::sizemap()->ByteSizeForClass(cl);
195
196 // We return prepackaged chains of the correct size to the central cache.
197 // TODO: Use the same format internally in the thread caches?
198 int batch_size = Static::sizemap()->num_objects_to_move(cl);
199 while (N > batch_size) {
200 void *tail, *head;
201 src->PopRange(batch_size, &head, &tail);
202 Static::central_cache()[cl].InsertRange(head, tail, batch_size);
203 N -= batch_size;
204 }
205 void *tail, *head;
206 src->PopRange(N, &head, &tail);
207 Static::central_cache()[cl].InsertRange(head, tail, N);
208 size_ -= delta_bytes;
209}
210
211// Release idle memory to the central cache
212void ThreadCache::Scavenge() {
213 // If the low-water mark for the free list is L, it means we would
214 // not have had to allocate anything from the central cache even if
215 // we had reduced the free list size by L. We aim to get closer to
216 // that situation by dropping L/2 nodes from the free list. This
217 // may not release much memory, but if so we will call scavenge again
218 // pretty soon and the low-water marks will be high on that call.
Brian Silverman20350ac2021-11-17 18:19:55 -0800219 for (int cl = 0; cl < Static::num_size_classes(); cl++) {
Austin Schuh745610d2015-09-06 18:19:50 -0700220 FreeList* list = &list_[cl];
221 const int lowmark = list->lowwatermark();
222 if (lowmark > 0) {
223 const int drop = (lowmark > 1) ? lowmark/2 : 1;
224 ReleaseToCentralCache(list, cl, drop);
225
226 // Shrink the max length if it isn't used. Only shrink down to
227 // batch_size -- if the thread was active enough to get the max_length
228 // above batch_size, it will likely be that active again. If
229 // max_length shinks below batch_size, the thread will have to
230 // go through the slow-start behavior again. The slow-start is useful
231 // mainly for threads that stay relatively idle for their entire
232 // lifetime.
233 const int batch_size = Static::sizemap()->num_objects_to_move(cl);
234 if (list->max_length() > batch_size) {
235 list->set_max_length(
236 max<int>(list->max_length() - batch_size, batch_size));
237 }
238 }
239 list->clear_lowwatermark();
240 }
241
242 IncreaseCacheLimit();
243}
244
245void ThreadCache::IncreaseCacheLimit() {
246 SpinLockHolder h(Static::pageheap_lock());
247 IncreaseCacheLimitLocked();
248}
249
250void ThreadCache::IncreaseCacheLimitLocked() {
251 if (unclaimed_cache_space_ > 0) {
252 // Possibly make unclaimed_cache_space_ negative.
253 unclaimed_cache_space_ -= kStealAmount;
Brian Silverman20350ac2021-11-17 18:19:55 -0800254 SetMaxSize(max_size_ + kStealAmount);
Austin Schuh745610d2015-09-06 18:19:50 -0700255 return;
256 }
257 // Don't hold pageheap_lock too long. Try to steal from 10 other
258 // threads before giving up. The i < 10 condition also prevents an
259 // infinite loop in case none of the existing thread heaps are
260 // suitable places to steal from.
261 for (int i = 0; i < 10;
262 ++i, next_memory_steal_ = next_memory_steal_->next_) {
263 // Reached the end of the linked list. Start at the beginning.
264 if (next_memory_steal_ == NULL) {
265 ASSERT(thread_heaps_ != NULL);
266 next_memory_steal_ = thread_heaps_;
267 }
268 if (next_memory_steal_ == this ||
269 next_memory_steal_->max_size_ <= kMinThreadCacheSize) {
270 continue;
271 }
Brian Silverman20350ac2021-11-17 18:19:55 -0800272 next_memory_steal_->SetMaxSize(next_memory_steal_->max_size_ - kStealAmount);
273 SetMaxSize(max_size_ + kStealAmount);
Austin Schuh745610d2015-09-06 18:19:50 -0700274
275 next_memory_steal_ = next_memory_steal_->next_;
276 return;
277 }
278}
279
280int ThreadCache::GetSamplePeriod() {
Brian Silverman20350ac2021-11-17 18:19:55 -0800281 return Sampler::GetSamplePeriod();
Austin Schuh745610d2015-09-06 18:19:50 -0700282}
283
284void ThreadCache::InitModule() {
Brian Silverman20350ac2021-11-17 18:19:55 -0800285 {
286 SpinLockHolder h(Static::pageheap_lock());
287 if (phinited) {
288 return;
289 }
Austin Schuh745610d2015-09-06 18:19:50 -0700290 const char *tcb = TCMallocGetenvSafe("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES");
291 if (tcb) {
292 set_overall_thread_cache_size(strtoll(tcb, NULL, 10));
293 }
294 Static::InitStaticVars();
295 threadcache_allocator.Init();
296 phinited = 1;
297 }
Brian Silverman20350ac2021-11-17 18:19:55 -0800298
299 // We do "late" part of initialization without holding lock since
300 // there is chance it'll recurse into malloc
301 Static::InitLateMaybeRecursive();
Austin Schuh745610d2015-09-06 18:19:50 -0700302}
303
304void ThreadCache::InitTSD() {
305 ASSERT(!tsd_inited_);
306 perftools_pthread_key_create(&heap_key_, DestroyThreadCache);
307 tsd_inited_ = true;
308
309#ifdef PTHREADS_CRASHES_IF_RUN_TOO_EARLY
310 // We may have used a fake pthread_t for the main thread. Fix it.
311 pthread_t zero;
312 memset(&zero, 0, sizeof(zero));
313 SpinLockHolder h(Static::pageheap_lock());
314 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
315 if (h->tid_ == zero) {
316 h->tid_ = pthread_self();
317 }
318 }
319#endif
320}
321
322ThreadCache* ThreadCache::CreateCacheIfNecessary() {
Brian Silverman20350ac2021-11-17 18:19:55 -0800323 if (!tsd_inited_) {
324#ifndef NDEBUG
325 // tests that freeing nullptr very early is working
326 free(NULL);
327#endif
328
329 InitModule();
330 }
331
Austin Schuh745610d2015-09-06 18:19:50 -0700332 // Initialize per-thread data if necessary
333 ThreadCache* heap = NULL;
Brian Silverman20350ac2021-11-17 18:19:55 -0800334
335 bool seach_condition = true;
336#ifdef HAVE_TLS
337 static __thread ThreadCache** current_heap_ptr ATTR_INITIAL_EXEC;
338 if (tsd_inited_) {
339 // In most common case we're avoiding expensive linear search
340 // through all heaps (see below). Working TLS enables faster
341 // protection from malloc recursion in pthread_setspecific
342 seach_condition = false;
343
344 if (current_heap_ptr != NULL) {
345 // we're being recursively called by pthread_setspecific below.
346 return *current_heap_ptr;
347 }
348 current_heap_ptr = &heap;
349 }
350#endif
351
Austin Schuh745610d2015-09-06 18:19:50 -0700352 {
353 SpinLockHolder h(Static::pageheap_lock());
354 // On some old glibc's, and on freebsd's libc (as of freebsd 8.1),
355 // calling pthread routines (even pthread_self) too early could
356 // cause a segfault. Since we can call pthreads quite early, we
357 // have to protect against that in such situations by making a
358 // 'fake' pthread. This is not ideal since it doesn't work well
359 // when linking tcmalloc statically with apps that create threads
360 // before main, so we only do it if we have to.
361#ifdef PTHREADS_CRASHES_IF_RUN_TOO_EARLY
362 pthread_t me;
363 if (!tsd_inited_) {
364 memset(&me, 0, sizeof(me));
365 } else {
366 me = pthread_self();
367 }
368#else
369 const pthread_t me = pthread_self();
370#endif
371
372 // This may be a recursive malloc call from pthread_setspecific()
373 // In that case, the heap for this thread has already been created
374 // and added to the linked list. So we search for that first.
Brian Silverman20350ac2021-11-17 18:19:55 -0800375 if (seach_condition) {
376 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
377 if (h->tid_ == me) {
378 heap = h;
379 break;
380 }
Austin Schuh745610d2015-09-06 18:19:50 -0700381 }
382 }
383
384 if (heap == NULL) heap = NewHeap(me);
385 }
386
387 // We call pthread_setspecific() outside the lock because it may
388 // call malloc() recursively. We check for the recursive call using
389 // the "in_setspecific_" flag so that we can avoid calling
390 // pthread_setspecific() if we are already inside pthread_setspecific().
391 if (!heap->in_setspecific_ && tsd_inited_) {
392 heap->in_setspecific_ = true;
393 perftools_pthread_setspecific(heap_key_, heap);
394#ifdef HAVE_TLS
395 // Also keep a copy in __thread for faster retrieval
396 threadlocal_data_.heap = heap;
Brian Silverman20350ac2021-11-17 18:19:55 -0800397 threadlocal_data_.fast_path_heap = heap;
Austin Schuh745610d2015-09-06 18:19:50 -0700398#endif
399 heap->in_setspecific_ = false;
400 }
Brian Silverman20350ac2021-11-17 18:19:55 -0800401#ifdef HAVE_TLS
402 current_heap_ptr = NULL;
403#endif
Austin Schuh745610d2015-09-06 18:19:50 -0700404 return heap;
405}
406
407ThreadCache* ThreadCache::NewHeap(pthread_t tid) {
408 // Create the heap and add it to the linked list
409 ThreadCache *heap = threadcache_allocator.New();
410 heap->Init(tid);
411 heap->next_ = thread_heaps_;
412 heap->prev_ = NULL;
413 if (thread_heaps_ != NULL) {
414 thread_heaps_->prev_ = heap;
415 } else {
416 // This is the only thread heap at the momment.
417 ASSERT(next_memory_steal_ == NULL);
418 next_memory_steal_ = heap;
419 }
420 thread_heaps_ = heap;
421 thread_heap_count_++;
422 return heap;
423}
424
425void ThreadCache::BecomeIdle() {
426 if (!tsd_inited_) return; // No caches yet
427 ThreadCache* heap = GetThreadHeap();
428 if (heap == NULL) return; // No thread cache to remove
429 if (heap->in_setspecific_) return; // Do not disturb the active caller
430
431 heap->in_setspecific_ = true;
432 perftools_pthread_setspecific(heap_key_, NULL);
433#ifdef HAVE_TLS
434 // Also update the copy in __thread
435 threadlocal_data_.heap = NULL;
Brian Silverman20350ac2021-11-17 18:19:55 -0800436 threadlocal_data_.fast_path_heap = NULL;
Austin Schuh745610d2015-09-06 18:19:50 -0700437#endif
438 heap->in_setspecific_ = false;
439 if (GetThreadHeap() == heap) {
440 // Somehow heap got reinstated by a recursive call to malloc
441 // from pthread_setspecific. We give up in this case.
442 return;
443 }
444
445 // We can now get rid of the heap
446 DeleteCache(heap);
447}
448
Brian Silverman20350ac2021-11-17 18:19:55 -0800449void ThreadCache::BecomeTemporarilyIdle() {
450 ThreadCache* heap = GetCacheIfPresent();
451 if (heap)
452 heap->Cleanup();
453}
454
Austin Schuh745610d2015-09-06 18:19:50 -0700455void ThreadCache::DestroyThreadCache(void* ptr) {
456 // Note that "ptr" cannot be NULL since pthread promises not
457 // to invoke the destructor on NULL values, but for safety,
458 // we check anyway.
459 if (ptr == NULL) return;
460#ifdef HAVE_TLS
461 // Prevent fast path of GetThreadHeap() from returning heap.
462 threadlocal_data_.heap = NULL;
Brian Silverman20350ac2021-11-17 18:19:55 -0800463 threadlocal_data_.fast_path_heap = NULL;
Austin Schuh745610d2015-09-06 18:19:50 -0700464#endif
465 DeleteCache(reinterpret_cast<ThreadCache*>(ptr));
466}
467
468void ThreadCache::DeleteCache(ThreadCache* heap) {
469 // Remove all memory from heap
470 heap->Cleanup();
471
472 // Remove from linked list
473 SpinLockHolder h(Static::pageheap_lock());
474 if (heap->next_ != NULL) heap->next_->prev_ = heap->prev_;
475 if (heap->prev_ != NULL) heap->prev_->next_ = heap->next_;
476 if (thread_heaps_ == heap) thread_heaps_ = heap->next_;
477 thread_heap_count_--;
478
479 if (next_memory_steal_ == heap) next_memory_steal_ = heap->next_;
480 if (next_memory_steal_ == NULL) next_memory_steal_ = thread_heaps_;
481 unclaimed_cache_space_ += heap->max_size_;
482
483 threadcache_allocator.Delete(heap);
484}
485
486void ThreadCache::RecomputePerThreadCacheSize() {
487 // Divide available space across threads
488 int n = thread_heap_count_ > 0 ? thread_heap_count_ : 1;
489 size_t space = overall_thread_cache_size_ / n;
490
491 // Limit to allowed range
492 if (space < kMinThreadCacheSize) space = kMinThreadCacheSize;
493 if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize;
494
495 double ratio = space / max<double>(1, per_thread_cache_size_);
496 size_t claimed = 0;
497 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
498 // Increasing the total cache size should not circumvent the
499 // slow-start growth of max_size_.
500 if (ratio < 1.0) {
Brian Silverman20350ac2021-11-17 18:19:55 -0800501 h->SetMaxSize(h->max_size_ * ratio);
Austin Schuh745610d2015-09-06 18:19:50 -0700502 }
503 claimed += h->max_size_;
504 }
505 unclaimed_cache_space_ = overall_thread_cache_size_ - claimed;
506 per_thread_cache_size_ = space;
507}
508
509void ThreadCache::GetThreadStats(uint64_t* total_bytes, uint64_t* class_count) {
510 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
511 *total_bytes += h->Size();
512 if (class_count) {
Brian Silverman20350ac2021-11-17 18:19:55 -0800513 for (int cl = 0; cl < Static::num_size_classes(); ++cl) {
Austin Schuh745610d2015-09-06 18:19:50 -0700514 class_count[cl] += h->freelist_length(cl);
515 }
516 }
517 }
518}
519
520void ThreadCache::set_overall_thread_cache_size(size_t new_size) {
521 // Clip the value to a reasonable range
522 if (new_size < kMinThreadCacheSize) new_size = kMinThreadCacheSize;
523 if (new_size > (1<<30)) new_size = (1<<30); // Limit to 1GB
524 overall_thread_cache_size_ = new_size;
525
526 RecomputePerThreadCacheSize();
527}
528
529} // namespace tcmalloc