blob: 444a09fc7e88a0e25781e79e64a0cbd53af80b64 [file] [log] [blame]
Austin Schuh745610d2015-09-06 18:19:50 -07001// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
2// Copyright (c) 2008, Google Inc.
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// ---
32// Author: Ken Ashcraft <opensource@google.com>
33
34#include <config.h>
35#include "thread_cache.h"
36#include <errno.h>
37#include <string.h> // for memcpy
38#include <algorithm> // for max, min
39#include "base/commandlineflags.h" // for SpinLockHolder
40#include "base/spinlock.h" // for SpinLockHolder
41#include "getenv_safe.h" // for TCMallocGetenvSafe
42#include "central_freelist.h" // for CentralFreeListPadded
43#include "maybe_threads.h"
44
45using std::min;
46using std::max;
47
48// Note: this is initialized manually in InitModule to ensure that
49// it's configured at right time
50//
51// DEFINE_int64(tcmalloc_max_total_thread_cache_bytes,
52// EnvToInt64("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES",
53// kDefaultOverallThreadCacheSize),
54// "Bound on the total amount of bytes allocated to "
55// "thread caches. This bound is not strict, so it is possible "
56// "for the cache to go over this bound in certain circumstances. "
57// "Maximum value of this flag is capped to 1 GB.");
58
59
60namespace tcmalloc {
61
62static bool phinited = false;
63
64volatile size_t ThreadCache::per_thread_cache_size_ = kMaxThreadCacheSize;
65size_t ThreadCache::overall_thread_cache_size_ = kDefaultOverallThreadCacheSize;
66ssize_t ThreadCache::unclaimed_cache_space_ = kDefaultOverallThreadCacheSize;
67PageHeapAllocator<ThreadCache> threadcache_allocator;
68ThreadCache* ThreadCache::thread_heaps_ = NULL;
69int ThreadCache::thread_heap_count_ = 0;
70ThreadCache* ThreadCache::next_memory_steal_ = NULL;
71#ifdef HAVE_TLS
72__thread ThreadCache::ThreadLocalData ThreadCache::threadlocal_data_
73 ATTR_INITIAL_EXEC
74 = {0, 0};
75#endif
76bool ThreadCache::tsd_inited_ = false;
77pthread_key_t ThreadCache::heap_key_;
78
79void ThreadCache::Init(pthread_t tid) {
80 size_ = 0;
81
82 max_size_ = 0;
83 IncreaseCacheLimitLocked();
84 if (max_size_ == 0) {
85 // There isn't enough memory to go around. Just give the minimum to
86 // this thread.
87 max_size_ = kMinThreadCacheSize;
88
89 // Take unclaimed_cache_space_ negative.
90 unclaimed_cache_space_ -= kMinThreadCacheSize;
91 ASSERT(unclaimed_cache_space_ < 0);
92 }
93
94 next_ = NULL;
95 prev_ = NULL;
96 tid_ = tid;
97 in_setspecific_ = false;
98 for (size_t cl = 0; cl < kNumClasses; ++cl) {
99 list_[cl].Init();
100 }
101
102 uint32_t sampler_seed;
103 memcpy(&sampler_seed, &tid, sizeof(sampler_seed));
104 sampler_.Init(sampler_seed);
105}
106
107void ThreadCache::Cleanup() {
108 // Put unused memory back into central cache
109 for (int cl = 0; cl < kNumClasses; ++cl) {
110 if (list_[cl].length() > 0) {
111 ReleaseToCentralCache(&list_[cl], cl, list_[cl].length());
112 }
113 }
114}
115
116// Remove some objects of class "cl" from central cache and add to thread heap.
117// On success, return the first object for immediate use; otherwise return NULL.
118void* ThreadCache::FetchFromCentralCache(size_t cl, size_t byte_size) {
119 FreeList* list = &list_[cl];
120 ASSERT(list->empty());
121 const int batch_size = Static::sizemap()->num_objects_to_move(cl);
122
123 const int num_to_move = min<int>(list->max_length(), batch_size);
124 void *start, *end;
125 int fetch_count = Static::central_cache()[cl].RemoveRange(
126 &start, &end, num_to_move);
127
128 ASSERT((start == NULL) == (fetch_count == 0));
129 if (--fetch_count >= 0) {
130 size_ += byte_size * fetch_count;
131 list->PushRange(fetch_count, SLL_Next(start), end);
132 }
133
134 // Increase max length slowly up to batch_size. After that,
135 // increase by batch_size in one shot so that the length is a
136 // multiple of batch_size.
137 if (list->max_length() < batch_size) {
138 list->set_max_length(list->max_length() + 1);
139 } else {
140 // Don't let the list get too long. In 32 bit builds, the length
141 // is represented by a 16 bit int, so we need to watch out for
142 // integer overflow.
143 int new_length = min<int>(list->max_length() + batch_size,
144 kMaxDynamicFreeListLength);
145 // The list's max_length must always be a multiple of batch_size,
146 // and kMaxDynamicFreeListLength is not necessarily a multiple
147 // of batch_size.
148 new_length -= new_length % batch_size;
149 ASSERT(new_length % batch_size == 0);
150 list->set_max_length(new_length);
151 }
152 return start;
153}
154
155void ThreadCache::ListTooLong(FreeList* list, size_t cl) {
156 const int batch_size = Static::sizemap()->num_objects_to_move(cl);
157 ReleaseToCentralCache(list, cl, batch_size);
158
159 // If the list is too long, we need to transfer some number of
160 // objects to the central cache. Ideally, we would transfer
161 // num_objects_to_move, so the code below tries to make max_length
162 // converge on num_objects_to_move.
163
164 if (list->max_length() < batch_size) {
165 // Slow start the max_length so we don't overreserve.
166 list->set_max_length(list->max_length() + 1);
167 } else if (list->max_length() > batch_size) {
168 // If we consistently go over max_length, shrink max_length. If we don't
169 // shrink it, some amount of memory will always stay in this freelist.
170 list->set_length_overages(list->length_overages() + 1);
171 if (list->length_overages() > kMaxOverages) {
172 ASSERT(list->max_length() > batch_size);
173 list->set_max_length(list->max_length() - batch_size);
174 list->set_length_overages(0);
175 }
176 }
177}
178
179// Remove some objects of class "cl" from thread heap and add to central cache
180void ThreadCache::ReleaseToCentralCache(FreeList* src, size_t cl, int N) {
181 ASSERT(src == &list_[cl]);
182 if (N > src->length()) N = src->length();
183 size_t delta_bytes = N * Static::sizemap()->ByteSizeForClass(cl);
184
185 // We return prepackaged chains of the correct size to the central cache.
186 // TODO: Use the same format internally in the thread caches?
187 int batch_size = Static::sizemap()->num_objects_to_move(cl);
188 while (N > batch_size) {
189 void *tail, *head;
190 src->PopRange(batch_size, &head, &tail);
191 Static::central_cache()[cl].InsertRange(head, tail, batch_size);
192 N -= batch_size;
193 }
194 void *tail, *head;
195 src->PopRange(N, &head, &tail);
196 Static::central_cache()[cl].InsertRange(head, tail, N);
197 size_ -= delta_bytes;
198}
199
200// Release idle memory to the central cache
201void ThreadCache::Scavenge() {
202 // If the low-water mark for the free list is L, it means we would
203 // not have had to allocate anything from the central cache even if
204 // we had reduced the free list size by L. We aim to get closer to
205 // that situation by dropping L/2 nodes from the free list. This
206 // may not release much memory, but if so we will call scavenge again
207 // pretty soon and the low-water marks will be high on that call.
208 //int64 start = CycleClock::Now();
209 for (int cl = 0; cl < kNumClasses; cl++) {
210 FreeList* list = &list_[cl];
211 const int lowmark = list->lowwatermark();
212 if (lowmark > 0) {
213 const int drop = (lowmark > 1) ? lowmark/2 : 1;
214 ReleaseToCentralCache(list, cl, drop);
215
216 // Shrink the max length if it isn't used. Only shrink down to
217 // batch_size -- if the thread was active enough to get the max_length
218 // above batch_size, it will likely be that active again. If
219 // max_length shinks below batch_size, the thread will have to
220 // go through the slow-start behavior again. The slow-start is useful
221 // mainly for threads that stay relatively idle for their entire
222 // lifetime.
223 const int batch_size = Static::sizemap()->num_objects_to_move(cl);
224 if (list->max_length() > batch_size) {
225 list->set_max_length(
226 max<int>(list->max_length() - batch_size, batch_size));
227 }
228 }
229 list->clear_lowwatermark();
230 }
231
232 IncreaseCacheLimit();
233}
234
235void ThreadCache::IncreaseCacheLimit() {
236 SpinLockHolder h(Static::pageheap_lock());
237 IncreaseCacheLimitLocked();
238}
239
240void ThreadCache::IncreaseCacheLimitLocked() {
241 if (unclaimed_cache_space_ > 0) {
242 // Possibly make unclaimed_cache_space_ negative.
243 unclaimed_cache_space_ -= kStealAmount;
244 max_size_ += kStealAmount;
245 return;
246 }
247 // Don't hold pageheap_lock too long. Try to steal from 10 other
248 // threads before giving up. The i < 10 condition also prevents an
249 // infinite loop in case none of the existing thread heaps are
250 // suitable places to steal from.
251 for (int i = 0; i < 10;
252 ++i, next_memory_steal_ = next_memory_steal_->next_) {
253 // Reached the end of the linked list. Start at the beginning.
254 if (next_memory_steal_ == NULL) {
255 ASSERT(thread_heaps_ != NULL);
256 next_memory_steal_ = thread_heaps_;
257 }
258 if (next_memory_steal_ == this ||
259 next_memory_steal_->max_size_ <= kMinThreadCacheSize) {
260 continue;
261 }
262 next_memory_steal_->max_size_ -= kStealAmount;
263 max_size_ += kStealAmount;
264
265 next_memory_steal_ = next_memory_steal_->next_;
266 return;
267 }
268}
269
270int ThreadCache::GetSamplePeriod() {
271 return sampler_.GetSamplePeriod();
272}
273
274void ThreadCache::InitModule() {
275 SpinLockHolder h(Static::pageheap_lock());
276 if (!phinited) {
277 const char *tcb = TCMallocGetenvSafe("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES");
278 if (tcb) {
279 set_overall_thread_cache_size(strtoll(tcb, NULL, 10));
280 }
281 Static::InitStaticVars();
282 threadcache_allocator.Init();
283 phinited = 1;
284 }
285}
286
287void ThreadCache::InitTSD() {
288 ASSERT(!tsd_inited_);
289 perftools_pthread_key_create(&heap_key_, DestroyThreadCache);
290 tsd_inited_ = true;
291
292#ifdef PTHREADS_CRASHES_IF_RUN_TOO_EARLY
293 // We may have used a fake pthread_t for the main thread. Fix it.
294 pthread_t zero;
295 memset(&zero, 0, sizeof(zero));
296 SpinLockHolder h(Static::pageheap_lock());
297 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
298 if (h->tid_ == zero) {
299 h->tid_ = pthread_self();
300 }
301 }
302#endif
303}
304
305ThreadCache* ThreadCache::CreateCacheIfNecessary() {
306 // Initialize per-thread data if necessary
307 ThreadCache* heap = NULL;
308 {
309 SpinLockHolder h(Static::pageheap_lock());
310 // On some old glibc's, and on freebsd's libc (as of freebsd 8.1),
311 // calling pthread routines (even pthread_self) too early could
312 // cause a segfault. Since we can call pthreads quite early, we
313 // have to protect against that in such situations by making a
314 // 'fake' pthread. This is not ideal since it doesn't work well
315 // when linking tcmalloc statically with apps that create threads
316 // before main, so we only do it if we have to.
317#ifdef PTHREADS_CRASHES_IF_RUN_TOO_EARLY
318 pthread_t me;
319 if (!tsd_inited_) {
320 memset(&me, 0, sizeof(me));
321 } else {
322 me = pthread_self();
323 }
324#else
325 const pthread_t me = pthread_self();
326#endif
327
328 // This may be a recursive malloc call from pthread_setspecific()
329 // In that case, the heap for this thread has already been created
330 // and added to the linked list. So we search for that first.
331 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
332 if (h->tid_ == me) {
333 heap = h;
334 break;
335 }
336 }
337
338 if (heap == NULL) heap = NewHeap(me);
339 }
340
341 // We call pthread_setspecific() outside the lock because it may
342 // call malloc() recursively. We check for the recursive call using
343 // the "in_setspecific_" flag so that we can avoid calling
344 // pthread_setspecific() if we are already inside pthread_setspecific().
345 if (!heap->in_setspecific_ && tsd_inited_) {
346 heap->in_setspecific_ = true;
347 perftools_pthread_setspecific(heap_key_, heap);
348#ifdef HAVE_TLS
349 // Also keep a copy in __thread for faster retrieval
350 threadlocal_data_.heap = heap;
351 SetMinSizeForSlowPath(kMaxSize + 1);
352#endif
353 heap->in_setspecific_ = false;
354 }
355 return heap;
356}
357
358ThreadCache* ThreadCache::NewHeap(pthread_t tid) {
359 // Create the heap and add it to the linked list
360 ThreadCache *heap = threadcache_allocator.New();
361 heap->Init(tid);
362 heap->next_ = thread_heaps_;
363 heap->prev_ = NULL;
364 if (thread_heaps_ != NULL) {
365 thread_heaps_->prev_ = heap;
366 } else {
367 // This is the only thread heap at the momment.
368 ASSERT(next_memory_steal_ == NULL);
369 next_memory_steal_ = heap;
370 }
371 thread_heaps_ = heap;
372 thread_heap_count_++;
373 return heap;
374}
375
376void ThreadCache::BecomeIdle() {
377 if (!tsd_inited_) return; // No caches yet
378 ThreadCache* heap = GetThreadHeap();
379 if (heap == NULL) return; // No thread cache to remove
380 if (heap->in_setspecific_) return; // Do not disturb the active caller
381
382 heap->in_setspecific_ = true;
383 perftools_pthread_setspecific(heap_key_, NULL);
384#ifdef HAVE_TLS
385 // Also update the copy in __thread
386 threadlocal_data_.heap = NULL;
387 SetMinSizeForSlowPath(0);
388#endif
389 heap->in_setspecific_ = false;
390 if (GetThreadHeap() == heap) {
391 // Somehow heap got reinstated by a recursive call to malloc
392 // from pthread_setspecific. We give up in this case.
393 return;
394 }
395
396 // We can now get rid of the heap
397 DeleteCache(heap);
398}
399
400void ThreadCache::DestroyThreadCache(void* ptr) {
401 // Note that "ptr" cannot be NULL since pthread promises not
402 // to invoke the destructor on NULL values, but for safety,
403 // we check anyway.
404 if (ptr == NULL) return;
405#ifdef HAVE_TLS
406 // Prevent fast path of GetThreadHeap() from returning heap.
407 threadlocal_data_.heap = NULL;
408 SetMinSizeForSlowPath(0);
409#endif
410 DeleteCache(reinterpret_cast<ThreadCache*>(ptr));
411}
412
413void ThreadCache::DeleteCache(ThreadCache* heap) {
414 // Remove all memory from heap
415 heap->Cleanup();
416
417 // Remove from linked list
418 SpinLockHolder h(Static::pageheap_lock());
419 if (heap->next_ != NULL) heap->next_->prev_ = heap->prev_;
420 if (heap->prev_ != NULL) heap->prev_->next_ = heap->next_;
421 if (thread_heaps_ == heap) thread_heaps_ = heap->next_;
422 thread_heap_count_--;
423
424 if (next_memory_steal_ == heap) next_memory_steal_ = heap->next_;
425 if (next_memory_steal_ == NULL) next_memory_steal_ = thread_heaps_;
426 unclaimed_cache_space_ += heap->max_size_;
427
428 threadcache_allocator.Delete(heap);
429}
430
431void ThreadCache::RecomputePerThreadCacheSize() {
432 // Divide available space across threads
433 int n = thread_heap_count_ > 0 ? thread_heap_count_ : 1;
434 size_t space = overall_thread_cache_size_ / n;
435
436 // Limit to allowed range
437 if (space < kMinThreadCacheSize) space = kMinThreadCacheSize;
438 if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize;
439
440 double ratio = space / max<double>(1, per_thread_cache_size_);
441 size_t claimed = 0;
442 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
443 // Increasing the total cache size should not circumvent the
444 // slow-start growth of max_size_.
445 if (ratio < 1.0) {
446 h->max_size_ = static_cast<size_t>(h->max_size_ * ratio);
447 }
448 claimed += h->max_size_;
449 }
450 unclaimed_cache_space_ = overall_thread_cache_size_ - claimed;
451 per_thread_cache_size_ = space;
452}
453
454void ThreadCache::GetThreadStats(uint64_t* total_bytes, uint64_t* class_count) {
455 for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
456 *total_bytes += h->Size();
457 if (class_count) {
458 for (int cl = 0; cl < kNumClasses; ++cl) {
459 class_count[cl] += h->freelist_length(cl);
460 }
461 }
462 }
463}
464
465void ThreadCache::set_overall_thread_cache_size(size_t new_size) {
466 // Clip the value to a reasonable range
467 if (new_size < kMinThreadCacheSize) new_size = kMinThreadCacheSize;
468 if (new_size > (1<<30)) new_size = (1<<30); // Limit to 1GB
469 overall_thread_cache_size_ = new_size;
470
471 RecomputePerThreadCacheSize();
472}
473
474} // namespace tcmalloc