blob: f8be15267a4fa2f0d899ace86c0f55a3392cb0d8 [file] [log] [blame]
Austin Schuh745610d2015-09-06 18:19:50 -07001// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
2// Copyright (c) 2008, Google Inc.
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// ---
32// Author: Sanjay Ghemawat <opensource@google.com>
33
34#ifndef TCMALLOC_THREAD_CACHE_H_
35#define TCMALLOC_THREAD_CACHE_H_
36
37#include <config.h>
38#ifdef HAVE_PTHREAD
39#include <pthread.h> // for pthread_t, pthread_key_t
40#endif
41#include <stddef.h> // for size_t, NULL
42#ifdef HAVE_STDINT_H
43#include <stdint.h> // for uint32_t, uint64_t
44#endif
45#include <sys/types.h> // for ssize_t
Brian Silverman20350ac2021-11-17 18:19:55 -080046#include "base/commandlineflags.h"
Austin Schuh745610d2015-09-06 18:19:50 -070047#include "common.h"
48#include "linked_list.h"
49#include "maybe_threads.h"
50#include "page_heap_allocator.h"
51#include "sampler.h"
52#include "static_vars.h"
53
54#include "common.h" // for SizeMap, kMaxSize, etc
55#include "internal_logging.h" // for ASSERT, etc
56#include "linked_list.h" // for SLL_Pop, SLL_PopRange, etc
57#include "page_heap_allocator.h" // for PageHeapAllocator
58#include "sampler.h" // for Sampler
59#include "static_vars.h" // for Static
60
Brian Silverman20350ac2021-11-17 18:19:55 -080061DECLARE_int64(tcmalloc_sample_parameter);
62
Austin Schuh745610d2015-09-06 18:19:50 -070063namespace tcmalloc {
64
65//-------------------------------------------------------------------
66// Data kept per thread
67//-------------------------------------------------------------------
68
69class ThreadCache {
70 public:
71#ifdef HAVE_TLS
72 enum { have_tls = true };
73#else
74 enum { have_tls = false };
75#endif
76
Austin Schuh745610d2015-09-06 18:19:50 -070077 void Init(pthread_t tid);
78 void Cleanup();
79
80 // Accessors (mostly just for printing stats)
Brian Silverman20350ac2021-11-17 18:19:55 -080081 int freelist_length(uint32 cl) const { return list_[cl].length(); }
Austin Schuh745610d2015-09-06 18:19:50 -070082
83 // Total byte size in cache
84 size_t Size() const { return size_; }
85
86 // Allocate an object of the given size and class. The size given
87 // must be the same as the size of the class in the size map.
Brian Silverman20350ac2021-11-17 18:19:55 -080088 void* Allocate(size_t size, uint32 cl, void *(*oom_handler)(size_t size));
89 void Deallocate(void* ptr, uint32 size_class);
Austin Schuh745610d2015-09-06 18:19:50 -070090
91 void Scavenge();
92
93 int GetSamplePeriod();
94
95 // Record allocation of "k" bytes. Return true iff allocation
96 // should be sampled
97 bool SampleAllocation(size_t k);
98
Brian Silverman20350ac2021-11-17 18:19:55 -080099 bool TryRecordAllocationFast(size_t k);
100
Austin Schuh745610d2015-09-06 18:19:50 -0700101 static void InitModule();
102 static void InitTSD();
103 static ThreadCache* GetThreadHeap();
104 static ThreadCache* GetCache();
105 static ThreadCache* GetCacheIfPresent();
Brian Silverman20350ac2021-11-17 18:19:55 -0800106 static ThreadCache* GetFastPathCache();
Austin Schuh745610d2015-09-06 18:19:50 -0700107 static ThreadCache* GetCacheWhichMustBePresent();
108 static ThreadCache* CreateCacheIfNecessary();
109 static void BecomeIdle();
Brian Silverman20350ac2021-11-17 18:19:55 -0800110 static void BecomeTemporarilyIdle();
111 static void SetUseEmergencyMalloc();
112 static void ResetUseEmergencyMalloc();
113 static bool IsUseEmergencyMalloc();
Austin Schuh745610d2015-09-06 18:19:50 -0700114
115 // Return the number of thread heaps in use.
116 static inline int HeapsInUse();
117
118 // Adds to *total_bytes the total number of bytes used by all thread heaps.
119 // Also, if class_count is not NULL, it must be an array of size kNumClasses,
120 // and this function will increment each element of class_count by the number
121 // of items in all thread-local freelists of the corresponding size class.
122 // REQUIRES: Static::pageheap_lock is held.
123 static void GetThreadStats(uint64_t* total_bytes, uint64_t* class_count);
124
125 // Sets the total thread cache size to new_size, recomputing the
126 // individual thread cache sizes as necessary.
127 // REQUIRES: Static::pageheap lock is held.
128 static void set_overall_thread_cache_size(size_t new_size);
129 static size_t overall_thread_cache_size() {
130 return overall_thread_cache_size_;
131 }
132
133 private:
134 class FreeList {
135 private:
136 void* list_; // Linked list of nodes
137
138#ifdef _LP64
139 // On 64-bit hardware, manipulating 16-bit values may be slightly slow.
140 uint32_t length_; // Current length.
141 uint32_t lowater_; // Low water mark for list length.
142 uint32_t max_length_; // Dynamic max list length based on usage.
143 // Tracks the number of times a deallocation has caused
144 // length_ > max_length_. After the kMaxOverages'th time, max_length_
145 // shrinks and length_overages_ is reset to zero.
146 uint32_t length_overages_;
147#else
148 // If we aren't using 64-bit pointers then pack these into less space.
149 uint16_t length_;
150 uint16_t lowater_;
151 uint16_t max_length_;
152 uint16_t length_overages_;
153#endif
154
Brian Silverman20350ac2021-11-17 18:19:55 -0800155 int32_t size_;
156
Austin Schuh745610d2015-09-06 18:19:50 -0700157 public:
Brian Silverman20350ac2021-11-17 18:19:55 -0800158 void Init(size_t size) {
Austin Schuh745610d2015-09-06 18:19:50 -0700159 list_ = NULL;
160 length_ = 0;
161 lowater_ = 0;
162 max_length_ = 1;
163 length_overages_ = 0;
Brian Silverman20350ac2021-11-17 18:19:55 -0800164 size_ = size;
Austin Schuh745610d2015-09-06 18:19:50 -0700165 }
166
167 // Return current length of list
168 size_t length() const {
169 return length_;
170 }
171
Brian Silverman20350ac2021-11-17 18:19:55 -0800172 int32_t object_size() const {
173 return size_;
174 }
175
Austin Schuh745610d2015-09-06 18:19:50 -0700176 // Return the maximum length of the list.
177 size_t max_length() const {
178 return max_length_;
179 }
180
181 // Set the maximum length of the list. If 'new_max' > length(), the
182 // client is responsible for removing objects from the list.
183 void set_max_length(size_t new_max) {
184 max_length_ = new_max;
185 }
186
187 // Return the number of times that length() has gone over max_length().
188 size_t length_overages() const {
189 return length_overages_;
190 }
191
192 void set_length_overages(size_t new_count) {
193 length_overages_ = new_count;
194 }
195
196 // Is list empty?
197 bool empty() const {
198 return list_ == NULL;
199 }
200
201 // Low-water mark management
202 int lowwatermark() const { return lowater_; }
203 void clear_lowwatermark() { lowater_ = length_; }
204
Brian Silverman20350ac2021-11-17 18:19:55 -0800205 uint32_t Push(void* ptr) {
206 uint32_t length = length_ + 1;
Austin Schuh745610d2015-09-06 18:19:50 -0700207 SLL_Push(&list_, ptr);
Brian Silverman20350ac2021-11-17 18:19:55 -0800208 length_ = length;
209 return length;
Austin Schuh745610d2015-09-06 18:19:50 -0700210 }
211
212 void* Pop() {
213 ASSERT(list_ != NULL);
214 length_--;
215 if (length_ < lowater_) lowater_ = length_;
216 return SLL_Pop(&list_);
217 }
218
Brian Silverman20350ac2021-11-17 18:19:55 -0800219 bool TryPop(void **rv) {
220 if (SLL_TryPop(&list_, rv)) {
221 length_--;
222 if (PREDICT_FALSE(length_ < lowater_)) lowater_ = length_;
223 return true;
224 }
225 return false;
226 }
227
Austin Schuh745610d2015-09-06 18:19:50 -0700228 void* Next() {
229 return SLL_Next(&list_);
230 }
231
232 void PushRange(int N, void *start, void *end) {
233 SLL_PushRange(&list_, start, end);
234 length_ += N;
235 }
236
237 void PopRange(int N, void **start, void **end) {
238 SLL_PopRange(&list_, N, start, end);
239 ASSERT(length_ >= N);
240 length_ -= N;
241 if (length_ < lowater_) lowater_ = length_;
242 }
243 };
244
245 // Gets and returns an object from the central cache, and, if possible,
246 // also adds some objects of that size class to this thread cache.
Brian Silverman20350ac2021-11-17 18:19:55 -0800247 void* FetchFromCentralCache(uint32 cl, int32_t byte_size,
248 void *(*oom_handler)(size_t size));
249
250 void ListTooLong(void* ptr, uint32 cl);
Austin Schuh745610d2015-09-06 18:19:50 -0700251
252 // Releases some number of items from src. Adjusts the list's max_length
253 // to eventually converge on num_objects_to_move(cl).
Brian Silverman20350ac2021-11-17 18:19:55 -0800254 void ListTooLong(FreeList* src, uint32 cl);
Austin Schuh745610d2015-09-06 18:19:50 -0700255
256 // Releases N items from this thread cache.
Brian Silverman20350ac2021-11-17 18:19:55 -0800257 void ReleaseToCentralCache(FreeList* src, uint32 cl, int N);
258
259 void SetMaxSize(int32 new_max_size);
Austin Schuh745610d2015-09-06 18:19:50 -0700260
261 // Increase max_size_ by reducing unclaimed_cache_space_ or by
262 // reducing the max_size_ of some other thread. In both cases,
263 // the delta is kStealAmount.
264 void IncreaseCacheLimit();
265 // Same as above but requires Static::pageheap_lock() is held.
266 void IncreaseCacheLimitLocked();
267
268 // If TLS is available, we also store a copy of the per-thread object
269 // in a __thread variable since __thread variables are faster to read
270 // than pthread_getspecific(). We still need pthread_setspecific()
271 // because __thread variables provide no way to run cleanup code when
272 // a thread is destroyed.
273 // We also give a hint to the compiler to use the "initial exec" TLS
274 // model. This is faster than the default TLS model, at the cost that
275 // you cannot dlopen this library. (To see the difference, look at
276 // the CPU use of __tls_get_addr with and without this attribute.)
277 // Since we don't really use dlopen in google code -- and using dlopen
278 // on a malloc replacement is asking for trouble in any case -- that's
279 // a good tradeoff for us.
Austin Schuh745610d2015-09-06 18:19:50 -0700280#ifdef HAVE_TLS
281 struct ThreadLocalData {
Brian Silverman20350ac2021-11-17 18:19:55 -0800282 ThreadCache* fast_path_heap;
Austin Schuh745610d2015-09-06 18:19:50 -0700283 ThreadCache* heap;
Brian Silverman20350ac2021-11-17 18:19:55 -0800284 bool use_emergency_malloc;
Austin Schuh745610d2015-09-06 18:19:50 -0700285 };
Brian Silverman20350ac2021-11-17 18:19:55 -0800286 static __thread ThreadLocalData threadlocal_data_
287 CACHELINE_ALIGNED ATTR_INITIAL_EXEC;
288
Austin Schuh745610d2015-09-06 18:19:50 -0700289#endif
290
291 // Thread-specific key. Initialization here is somewhat tricky
292 // because some Linux startup code invokes malloc() before it
293 // is in a good enough state to handle pthread_keycreate().
294 // Therefore, we use TSD keys only after tsd_inited is set to true.
295 // Until then, we use a slow path to get the heap object.
Brian Silverman20350ac2021-11-17 18:19:55 -0800296 static ATTRIBUTE_HIDDEN bool tsd_inited_;
Austin Schuh745610d2015-09-06 18:19:50 -0700297 static pthread_key_t heap_key_;
298
299 // Linked list of heap objects. Protected by Static::pageheap_lock.
300 static ThreadCache* thread_heaps_;
301 static int thread_heap_count_;
302
303 // A pointer to one of the objects in thread_heaps_. Represents
304 // the next ThreadCache from which a thread over its max_size_ should
305 // steal memory limit. Round-robin through all of the objects in
306 // thread_heaps_. Protected by Static::pageheap_lock.
307 static ThreadCache* next_memory_steal_;
308
309 // Overall thread cache size. Protected by Static::pageheap_lock.
310 static size_t overall_thread_cache_size_;
311
312 // Global per-thread cache size. Writes are protected by
313 // Static::pageheap_lock. Reads are done without any locking, which should be
314 // fine as long as size_t can be written atomically and we don't place
315 // invariants between this variable and other pieces of state.
316 static volatile size_t per_thread_cache_size_;
317
318 // Represents overall_thread_cache_size_ minus the sum of max_size_
319 // across all ThreadCaches. Protected by Static::pageheap_lock.
320 static ssize_t unclaimed_cache_space_;
321
322 // This class is laid out with the most frequently used fields
323 // first so that hot elements are placed on the same cache line.
324
Brian Silverman20350ac2021-11-17 18:19:55 -0800325 FreeList list_[kClassSizesMax]; // Array indexed by size-class
326
327 int32 size_; // Combined size of data
328 int32 max_size_; // size_ > max_size_ --> Scavenge()
Austin Schuh745610d2015-09-06 18:19:50 -0700329
330 // We sample allocations, biased by the size of the allocation
331 Sampler sampler_; // A sampler
332
Austin Schuh745610d2015-09-06 18:19:50 -0700333 pthread_t tid_; // Which thread owns it
334 bool in_setspecific_; // In call to pthread_setspecific?
335
336 // Allocate a new heap. REQUIRES: Static::pageheap_lock is held.
337 static ThreadCache* NewHeap(pthread_t tid);
338
339 // Use only as pthread thread-specific destructor function.
340 static void DestroyThreadCache(void* ptr);
341
342 static void DeleteCache(ThreadCache* heap);
343 static void RecomputePerThreadCacheSize();
344
Brian Silverman20350ac2021-11-17 18:19:55 -0800345public:
346
347 // All ThreadCache objects are kept in a linked list (for stats collection)
348 ThreadCache* next_;
349 ThreadCache* prev_;
350
Austin Schuh745610d2015-09-06 18:19:50 -0700351 // Ensure that this class is cacheline-aligned. This is critical for
352 // performance, as false sharing would negate many of the benefits
353 // of a per-thread cache.
354} CACHELINE_ALIGNED;
355
356// Allocator for thread heaps
357// This is logically part of the ThreadCache class, but MSVC, at
358// least, does not like using ThreadCache as a template argument
359// before the class is fully defined. So we put it outside the class.
360extern PageHeapAllocator<ThreadCache> threadcache_allocator;
361
362inline int ThreadCache::HeapsInUse() {
363 return threadcache_allocator.inuse();
364}
365
Brian Silverman20350ac2021-11-17 18:19:55 -0800366inline ATTRIBUTE_ALWAYS_INLINE void* ThreadCache::Allocate(
367 size_t size, uint32 cl, void *(*oom_handler)(size_t size)) {
Austin Schuh745610d2015-09-06 18:19:50 -0700368 FreeList* list = &list_[cl];
Brian Silverman20350ac2021-11-17 18:19:55 -0800369
370#ifdef NO_TCMALLOC_SAMPLES
371 size = list->object_size();
372#endif
373
374 ASSERT(size <= kMaxSize);
375 ASSERT(size != 0);
376 ASSERT(size == 0 || size == Static::sizemap()->ByteSizeForClass(cl));
377
378 void* rv;
379 if (!list->TryPop(&rv)) {
380 return FetchFromCentralCache(cl, size, oom_handler);
Austin Schuh745610d2015-09-06 18:19:50 -0700381 }
382 size_ -= size;
Brian Silverman20350ac2021-11-17 18:19:55 -0800383 return rv;
Austin Schuh745610d2015-09-06 18:19:50 -0700384}
385
Brian Silverman20350ac2021-11-17 18:19:55 -0800386inline ATTRIBUTE_ALWAYS_INLINE void ThreadCache::Deallocate(void* ptr, uint32 cl) {
387 ASSERT(list_[cl].max_length() > 0);
Austin Schuh745610d2015-09-06 18:19:50 -0700388 FreeList* list = &list_[cl];
Austin Schuh745610d2015-09-06 18:19:50 -0700389
390 // This catches back-to-back frees of allocs in the same size
391 // class. A more comprehensive (and expensive) test would be to walk
392 // the entire freelist. But this might be enough to find some bugs.
393 ASSERT(ptr != list->Next());
394
Brian Silverman20350ac2021-11-17 18:19:55 -0800395 uint32_t length = list->Push(ptr);
Austin Schuh745610d2015-09-06 18:19:50 -0700396
Brian Silverman20350ac2021-11-17 18:19:55 -0800397 if (PREDICT_FALSE(length > list->max_length())) {
398 ListTooLong(list, cl);
399 return;
400 }
401
402 size_ += list->object_size();
403 if (PREDICT_FALSE(size_ > max_size_)){
404 Scavenge();
Austin Schuh745610d2015-09-06 18:19:50 -0700405 }
406}
407
408inline ThreadCache* ThreadCache::GetThreadHeap() {
409#ifdef HAVE_TLS
410 return threadlocal_data_.heap;
411#else
412 return reinterpret_cast<ThreadCache *>(
413 perftools_pthread_getspecific(heap_key_));
414#endif
415}
416
417inline ThreadCache* ThreadCache::GetCacheWhichMustBePresent() {
418#ifdef HAVE_TLS
419 ASSERT(threadlocal_data_.heap);
420 return threadlocal_data_.heap;
421#else
422 ASSERT(perftools_pthread_getspecific(heap_key_));
423 return reinterpret_cast<ThreadCache *>(
424 perftools_pthread_getspecific(heap_key_));
425#endif
426}
427
428inline ThreadCache* ThreadCache::GetCache() {
Brian Silverman20350ac2021-11-17 18:19:55 -0800429#ifdef HAVE_TLS
430 ThreadCache* ptr = GetThreadHeap();
431#else
Austin Schuh745610d2015-09-06 18:19:50 -0700432 ThreadCache* ptr = NULL;
Brian Silverman20350ac2021-11-17 18:19:55 -0800433 if (PREDICT_TRUE(tsd_inited_)) {
Austin Schuh745610d2015-09-06 18:19:50 -0700434 ptr = GetThreadHeap();
435 }
Brian Silverman20350ac2021-11-17 18:19:55 -0800436#endif
Austin Schuh745610d2015-09-06 18:19:50 -0700437 if (ptr == NULL) ptr = CreateCacheIfNecessary();
438 return ptr;
439}
440
441// In deletion paths, we do not try to create a thread-cache. This is
442// because we may be in the thread destruction code and may have
443// already cleaned up the cache for this thread.
444inline ThreadCache* ThreadCache::GetCacheIfPresent() {
Brian Silverman20350ac2021-11-17 18:19:55 -0800445#ifndef HAVE_TLS
446 if (PREDICT_FALSE(!tsd_inited_)) return NULL;
447#endif
Austin Schuh745610d2015-09-06 18:19:50 -0700448 return GetThreadHeap();
449}
450
Brian Silverman20350ac2021-11-17 18:19:55 -0800451inline ThreadCache* ThreadCache::GetFastPathCache() {
452#ifndef HAVE_TLS
453 return GetCacheIfPresent();
Austin Schuh745610d2015-09-06 18:19:50 -0700454#else
Brian Silverman20350ac2021-11-17 18:19:55 -0800455 return threadlocal_data_.fast_path_heap;
Austin Schuh745610d2015-09-06 18:19:50 -0700456#endif
457}
458
Brian Silverman20350ac2021-11-17 18:19:55 -0800459inline void ThreadCache::SetUseEmergencyMalloc() {
Austin Schuh745610d2015-09-06 18:19:50 -0700460#ifdef HAVE_TLS
Brian Silverman20350ac2021-11-17 18:19:55 -0800461 threadlocal_data_.fast_path_heap = NULL;
462 threadlocal_data_.use_emergency_malloc = true;
Austin Schuh745610d2015-09-06 18:19:50 -0700463#endif
464}
465
Brian Silverman20350ac2021-11-17 18:19:55 -0800466inline void ThreadCache::ResetUseEmergencyMalloc() {
467#ifdef HAVE_TLS
468 ThreadCache *heap = threadlocal_data_.heap;
469 threadlocal_data_.fast_path_heap = heap;
470 threadlocal_data_.use_emergency_malloc = false;
471#endif
472}
473
474inline bool ThreadCache::IsUseEmergencyMalloc() {
475#if defined(HAVE_TLS) && defined(ENABLE_EMERGENCY_MALLOC)
476 return PREDICT_FALSE(threadlocal_data_.use_emergency_malloc);
477#else
478 return false;
479#endif
480}
481
482inline void ThreadCache::SetMaxSize(int32 new_max_size) {
483 max_size_ = new_max_size;
484}
485
486#ifndef NO_TCMALLOC_SAMPLES
487
488inline bool ThreadCache::SampleAllocation(size_t k) {
489 return !sampler_.RecordAllocation(k);
490}
491
492inline bool ThreadCache::TryRecordAllocationFast(size_t k) {
493 return sampler_.TryRecordAllocationFast(k);
494}
495
496#else
497
498inline bool ThreadCache::SampleAllocation(size_t k) {
499 return false;
500}
501
502inline bool ThreadCache::TryRecordAllocationFast(size_t k) {
503 return true;
504}
505
506#endif
507
Austin Schuh745610d2015-09-06 18:19:50 -0700508} // namespace tcmalloc
509
510#endif // TCMALLOC_THREAD_CACHE_H_