Blame - third_party/gperftools/src/thread_cache.cc - RealtimeRoboticsGroup/test

blob: 444a09fc7e88a0e25781e79e64a0cbd53af80b64 [file] [log] [blame]

Austin Schuh	745610d	2015-09-06 18:19:50 -0700	[diff] [blame]	1	// -- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil --
				2	// Copyright (c) 2008, Google Inc.
				3	// All rights reserved.
				4	//
				5	// Redistribution and use in source and binary forms, with or without
				6	// modification, are permitted provided that the following conditions are
				7	// met:
				8	//
				9	// * Redistributions of source code must retain the above copyright
				10	// notice, this list of conditions and the following disclaimer.
				11	// * Redistributions in binary form must reproduce the above
				12	// copyright notice, this list of conditions and the following disclaimer
				13	// in the documentation and/or other materials provided with the
				14	// distribution.
				15	// * Neither the name of Google Inc. nor the names of its
				16	// contributors may be used to endorse or promote products derived from
				17	// this software without specific prior written permission.
				18	//
				19	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				20	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				21	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				22	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				23	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				24	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				25	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				26	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				27	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				28	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				29	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				30
				31	// ---
				32	// Author: Ken Ashcraft <opensource@google.com>
				33
				34	#include <config.h>
				35	#include "thread_cache.h"
				36	#include <errno.h>
				37	#include <string.h> // for memcpy
				38	#include <algorithm> // for max, min
				39	#include "base/commandlineflags.h" // for SpinLockHolder
				40	#include "base/spinlock.h" // for SpinLockHolder
				41	#include "getenv_safe.h" // for TCMallocGetenvSafe
				42	#include "central_freelist.h" // for CentralFreeListPadded
				43	#include "maybe_threads.h"
				44
				45	using std::min;
				46	using std::max;
				47
				48	// Note: this is initialized manually in InitModule to ensure that
				49	// it's configured at right time
				50	//
				51	// DEFINE_int64(tcmalloc_max_total_thread_cache_bytes,
				52	// EnvToInt64("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES",
				53	// kDefaultOverallThreadCacheSize),
				54	// "Bound on the total amount of bytes allocated to "
				55	// "thread caches. This bound is not strict, so it is possible "
				56	// "for the cache to go over this bound in certain circumstances. "
				57	// "Maximum value of this flag is capped to 1 GB.");
				58
				59
				60	namespace tcmalloc {
				61
				62	static bool phinited = false;
				63
				64	volatile size_t ThreadCache::per_thread_cache_size_ = kMaxThreadCacheSize;
				65	size_t ThreadCache::overall_thread_cache_size_ = kDefaultOverallThreadCacheSize;
				66	ssize_t ThreadCache::unclaimed_cache_space_ = kDefaultOverallThreadCacheSize;
				67	PageHeapAllocator<ThreadCache> threadcache_allocator;
				68	ThreadCache* ThreadCache::thread_heaps_ = NULL;
				69	int ThreadCache::thread_heap_count_ = 0;
				70	ThreadCache* ThreadCache::next_memory_steal_ = NULL;
				71	#ifdef HAVE_TLS
				72	__thread ThreadCache::ThreadLocalData ThreadCache::threadlocal_data_
				73	ATTR_INITIAL_EXEC
				74	= {0, 0};
				75	#endif
				76	bool ThreadCache::tsd_inited_ = false;
				77	pthread_key_t ThreadCache::heap_key_;
				78
				79	void ThreadCache::Init(pthread_t tid) {
				80	size_ = 0;
				81
				82	max_size_ = 0;
				83	IncreaseCacheLimitLocked();
				84	if (max_size_ == 0) {
				85	// There isn't enough memory to go around. Just give the minimum to
				86	// this thread.
				87	max_size_ = kMinThreadCacheSize;
				88
				89	// Take unclaimed_cache_space_ negative.
				90	unclaimed_cache_space_ -= kMinThreadCacheSize;
				91	ASSERT(unclaimed_cache_space_ < 0);
				92	}
				93
				94	next_ = NULL;
				95	prev_ = NULL;
				96	tid_ = tid;
				97	in_setspecific_ = false;
				98	for (size_t cl = 0; cl < kNumClasses; ++cl) {
				99	list_[cl].Init();
				100	}
				101
				102	uint32_t sampler_seed;
				103	memcpy(&sampler_seed, &tid, sizeof(sampler_seed));
				104	sampler_.Init(sampler_seed);
				105	}
				106
				107	void ThreadCache::Cleanup() {
				108	// Put unused memory back into central cache
				109	for (int cl = 0; cl < kNumClasses; ++cl) {
				110	if (list_[cl].length() > 0) {
				111	ReleaseToCentralCache(&list_[cl], cl, list_[cl].length());
				112	}
				113	}
				114	}
				115
				116	// Remove some objects of class "cl" from central cache and add to thread heap.
				117	// On success, return the first object for immediate use; otherwise return NULL.
				118	void* ThreadCache::FetchFromCentralCache(size_t cl, size_t byte_size) {
				119	FreeList* list = &list_[cl];
				120	ASSERT(list->empty());
				121	const int batch_size = Static::sizemap()->num_objects_to_move(cl);
				122
				123	const int num_to_move = min<int>(list->max_length(), batch_size);
				124	void start, end;
				125	int fetch_count = Static::central_cache()[cl].RemoveRange(
				126	&start, &end, num_to_move);
				127
				128	ASSERT((start == NULL) == (fetch_count == 0));
				129	if (--fetch_count >= 0) {
				130	size_ += byte_size * fetch_count;
				131	list->PushRange(fetch_count, SLL_Next(start), end);
				132	}
				133
				134	// Increase max length slowly up to batch_size. After that,
				135	// increase by batch_size in one shot so that the length is a
				136	// multiple of batch_size.
				137	if (list->max_length() < batch_size) {
				138	list->set_max_length(list->max_length() + 1);
				139	} else {
				140	// Don't let the list get too long. In 32 bit builds, the length
				141	// is represented by a 16 bit int, so we need to watch out for
				142	// integer overflow.
				143	int new_length = min<int>(list->max_length() + batch_size,
				144	kMaxDynamicFreeListLength);
				145	// The list's max_length must always be a multiple of batch_size,
				146	// and kMaxDynamicFreeListLength is not necessarily a multiple
				147	// of batch_size.
				148	new_length -= new_length % batch_size;
				149	ASSERT(new_length % batch_size == 0);
				150	list->set_max_length(new_length);
				151	}
				152	return start;
				153	}
				154
				155	void ThreadCache::ListTooLong(FreeList* list, size_t cl) {
				156	const int batch_size = Static::sizemap()->num_objects_to_move(cl);
				157	ReleaseToCentralCache(list, cl, batch_size);
				158
				159	// If the list is too long, we need to transfer some number of
				160	// objects to the central cache. Ideally, we would transfer
				161	// num_objects_to_move, so the code below tries to make max_length
				162	// converge on num_objects_to_move.
				163
				164	if (list->max_length() < batch_size) {
				165	// Slow start the max_length so we don't overreserve.
				166	list->set_max_length(list->max_length() + 1);
				167	} else if (list->max_length() > batch_size) {
				168	// If we consistently go over max_length, shrink max_length. If we don't
				169	// shrink it, some amount of memory will always stay in this freelist.
				170	list->set_length_overages(list->length_overages() + 1);
				171	if (list->length_overages() > kMaxOverages) {
				172	ASSERT(list->max_length() > batch_size);
				173	list->set_max_length(list->max_length() - batch_size);
				174	list->set_length_overages(0);
				175	}
				176	}
				177	}
				178
				179	// Remove some objects of class "cl" from thread heap and add to central cache
				180	void ThreadCache::ReleaseToCentralCache(FreeList* src, size_t cl, int N) {
				181	ASSERT(src == &list_[cl]);
				182	if (N > src->length()) N = src->length();
				183	size_t delta_bytes = N * Static::sizemap()->ByteSizeForClass(cl);
				184
				185	// We return prepackaged chains of the correct size to the central cache.
				186	// TODO: Use the same format internally in the thread caches?
				187	int batch_size = Static::sizemap()->num_objects_to_move(cl);
				188	while (N > batch_size) {
				189	void tail, head;
				190	src->PopRange(batch_size, &head, &tail);
				191	Static::central_cache()[cl].InsertRange(head, tail, batch_size);
				192	N -= batch_size;
				193	}
				194	void tail, head;
				195	src->PopRange(N, &head, &tail);
				196	Static::central_cache()[cl].InsertRange(head, tail, N);
				197	size_ -= delta_bytes;
				198	}
				199
				200	// Release idle memory to the central cache
				201	void ThreadCache::Scavenge() {
				202	// If the low-water mark for the free list is L, it means we would
				203	// not have had to allocate anything from the central cache even if
				204	// we had reduced the free list size by L. We aim to get closer to
				205	// that situation by dropping L/2 nodes from the free list. This
				206	// may not release much memory, but if so we will call scavenge again
				207	// pretty soon and the low-water marks will be high on that call.
				208	//int64 start = CycleClock::Now();
				209	for (int cl = 0; cl < kNumClasses; cl++) {
				210	FreeList* list = &list_[cl];
				211	const int lowmark = list->lowwatermark();
				212	if (lowmark > 0) {
				213	const int drop = (lowmark > 1) ? lowmark/2 : 1;
				214	ReleaseToCentralCache(list, cl, drop);
				215
				216	// Shrink the max length if it isn't used. Only shrink down to
				217	// batch_size -- if the thread was active enough to get the max_length
				218	// above batch_size, it will likely be that active again. If
				219	// max_length shinks below batch_size, the thread will have to
				220	// go through the slow-start behavior again. The slow-start is useful
				221	// mainly for threads that stay relatively idle for their entire
				222	// lifetime.
				223	const int batch_size = Static::sizemap()->num_objects_to_move(cl);
				224	if (list->max_length() > batch_size) {
				225	list->set_max_length(
				226	max<int>(list->max_length() - batch_size, batch_size));
				227	}
				228	}
				229	list->clear_lowwatermark();
				230	}
				231
				232	IncreaseCacheLimit();
				233	}
				234
				235	void ThreadCache::IncreaseCacheLimit() {
				236	SpinLockHolder h(Static::pageheap_lock());
				237	IncreaseCacheLimitLocked();
				238	}
				239
				240	void ThreadCache::IncreaseCacheLimitLocked() {
				241	if (unclaimed_cache_space_ > 0) {
				242	// Possibly make unclaimed_cache_space_ negative.
				243	unclaimed_cache_space_ -= kStealAmount;
				244	max_size_ += kStealAmount;
				245	return;
				246	}
				247	// Don't hold pageheap_lock too long. Try to steal from 10 other
				248	// threads before giving up. The i < 10 condition also prevents an
				249	// infinite loop in case none of the existing thread heaps are
				250	// suitable places to steal from.
				251	for (int i = 0; i < 10;
				252	++i, next_memory_steal_ = next_memory_steal_->next_) {
				253	// Reached the end of the linked list. Start at the beginning.
				254	if (next_memory_steal_ == NULL) {
				255	ASSERT(thread_heaps_ != NULL);
				256	next_memory_steal_ = thread_heaps_;
				257	}
				258	if (next_memory_steal_ == this \|\|
				259	next_memory_steal_->max_size_ <= kMinThreadCacheSize) {
				260	continue;
				261	}
				262	next_memory_steal_->max_size_ -= kStealAmount;
				263	max_size_ += kStealAmount;
				264
				265	next_memory_steal_ = next_memory_steal_->next_;
				266	return;
				267	}
				268	}
				269
				270	int ThreadCache::GetSamplePeriod() {
				271	return sampler_.GetSamplePeriod();
				272	}
				273
				274	void ThreadCache::InitModule() {
				275	SpinLockHolder h(Static::pageheap_lock());
				276	if (!phinited) {
				277	const char *tcb = TCMallocGetenvSafe("TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES");
				278	if (tcb) {
				279	set_overall_thread_cache_size(strtoll(tcb, NULL, 10));
				280	}
				281	Static::InitStaticVars();
				282	threadcache_allocator.Init();
				283	phinited = 1;
				284	}
				285	}
				286
				287	void ThreadCache::InitTSD() {
				288	ASSERT(!tsd_inited_);
				289	perftools_pthread_key_create(&heap_key_, DestroyThreadCache);
				290	tsd_inited_ = true;
				291
				292	#ifdef PTHREADS_CRASHES_IF_RUN_TOO_EARLY
				293	// We may have used a fake pthread_t for the main thread. Fix it.
				294	pthread_t zero;
				295	memset(&zero, 0, sizeof(zero));
				296	SpinLockHolder h(Static::pageheap_lock());
				297	for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
				298	if (h->tid_ == zero) {
				299	h->tid_ = pthread_self();
				300	}
				301	}
				302	#endif
				303	}
				304
				305	ThreadCache* ThreadCache::CreateCacheIfNecessary() {
				306	// Initialize per-thread data if necessary
				307	ThreadCache* heap = NULL;
				308	{
				309	SpinLockHolder h(Static::pageheap_lock());
				310	// On some old glibc's, and on freebsd's libc (as of freebsd 8.1),
				311	// calling pthread routines (even pthread_self) too early could
				312	// cause a segfault. Since we can call pthreads quite early, we
				313	// have to protect against that in such situations by making a
				314	// 'fake' pthread. This is not ideal since it doesn't work well
				315	// when linking tcmalloc statically with apps that create threads
				316	// before main, so we only do it if we have to.
				317	#ifdef PTHREADS_CRASHES_IF_RUN_TOO_EARLY
				318	pthread_t me;
				319	if (!tsd_inited_) {
				320	memset(&me, 0, sizeof(me));
				321	} else {
				322	me = pthread_self();
				323	}
				324	#else
				325	const pthread_t me = pthread_self();
				326	#endif
				327
				328	// This may be a recursive malloc call from pthread_setspecific()
				329	// In that case, the heap for this thread has already been created
				330	// and added to the linked list. So we search for that first.
				331	for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
				332	if (h->tid_ == me) {
				333	heap = h;
				334	break;
				335	}
				336	}
				337
				338	if (heap == NULL) heap = NewHeap(me);
				339	}
				340
				341	// We call pthread_setspecific() outside the lock because it may
				342	// call malloc() recursively. We check for the recursive call using
				343	// the "in_setspecific_" flag so that we can avoid calling
				344	// pthread_setspecific() if we are already inside pthread_setspecific().
				345	if (!heap->in_setspecific_ && tsd_inited_) {
				346	heap->in_setspecific_ = true;
				347	perftools_pthread_setspecific(heap_key_, heap);
				348	#ifdef HAVE_TLS
				349	// Also keep a copy in __thread for faster retrieval
				350	threadlocal_data_.heap = heap;
				351	SetMinSizeForSlowPath(kMaxSize + 1);
				352	#endif
				353	heap->in_setspecific_ = false;
				354	}
				355	return heap;
				356	}
				357
				358	ThreadCache* ThreadCache::NewHeap(pthread_t tid) {
				359	// Create the heap and add it to the linked list
				360	ThreadCache *heap = threadcache_allocator.New();
				361	heap->Init(tid);
				362	heap->next_ = thread_heaps_;
				363	heap->prev_ = NULL;
				364	if (thread_heaps_ != NULL) {
				365	thread_heaps_->prev_ = heap;
				366	} else {
				367	// This is the only thread heap at the momment.
				368	ASSERT(next_memory_steal_ == NULL);
				369	next_memory_steal_ = heap;
				370	}
				371	thread_heaps_ = heap;
				372	thread_heap_count_++;
				373	return heap;
				374	}
				375
				376	void ThreadCache::BecomeIdle() {
				377	if (!tsd_inited_) return; // No caches yet
				378	ThreadCache* heap = GetThreadHeap();
				379	if (heap == NULL) return; // No thread cache to remove
				380	if (heap->in_setspecific_) return; // Do not disturb the active caller
				381
				382	heap->in_setspecific_ = true;
				383	perftools_pthread_setspecific(heap_key_, NULL);
				384	#ifdef HAVE_TLS
				385	// Also update the copy in __thread
				386	threadlocal_data_.heap = NULL;
				387	SetMinSizeForSlowPath(0);
				388	#endif
				389	heap->in_setspecific_ = false;
				390	if (GetThreadHeap() == heap) {
				391	// Somehow heap got reinstated by a recursive call to malloc
				392	// from pthread_setspecific. We give up in this case.
				393	return;
				394	}
				395
				396	// We can now get rid of the heap
				397	DeleteCache(heap);
				398	}
				399
				400	void ThreadCache::DestroyThreadCache(void* ptr) {
				401	// Note that "ptr" cannot be NULL since pthread promises not
				402	// to invoke the destructor on NULL values, but for safety,
				403	// we check anyway.
				404	if (ptr == NULL) return;
				405	#ifdef HAVE_TLS
				406	// Prevent fast path of GetThreadHeap() from returning heap.
				407	threadlocal_data_.heap = NULL;
				408	SetMinSizeForSlowPath(0);
				409	#endif
				410	DeleteCache(reinterpret_cast<ThreadCache*>(ptr));
				411	}
				412
				413	void ThreadCache::DeleteCache(ThreadCache* heap) {
				414	// Remove all memory from heap
				415	heap->Cleanup();
				416
				417	// Remove from linked list
				418	SpinLockHolder h(Static::pageheap_lock());
				419	if (heap->next_ != NULL) heap->next_->prev_ = heap->prev_;
				420	if (heap->prev_ != NULL) heap->prev_->next_ = heap->next_;
				421	if (thread_heaps_ == heap) thread_heaps_ = heap->next_;
				422	thread_heap_count_--;
				423
				424	if (next_memory_steal_ == heap) next_memory_steal_ = heap->next_;
				425	if (next_memory_steal_ == NULL) next_memory_steal_ = thread_heaps_;
				426	unclaimed_cache_space_ += heap->max_size_;
				427
				428	threadcache_allocator.Delete(heap);
				429	}
				430
				431	void ThreadCache::RecomputePerThreadCacheSize() {
				432	// Divide available space across threads
				433	int n = thread_heap_count_ > 0 ? thread_heap_count_ : 1;
				434	size_t space = overall_thread_cache_size_ / n;
				435
				436	// Limit to allowed range
				437	if (space < kMinThreadCacheSize) space = kMinThreadCacheSize;
				438	if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize;
				439
				440	double ratio = space / max<double>(1, per_thread_cache_size_);
				441	size_t claimed = 0;
				442	for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
				443	// Increasing the total cache size should not circumvent the
				444	// slow-start growth of max_size_.
				445	if (ratio < 1.0) {
				446	h->max_size_ = static_cast<size_t>(h->max_size_ * ratio);
				447	}
				448	claimed += h->max_size_;
				449	}
				450	unclaimed_cache_space_ = overall_thread_cache_size_ - claimed;
				451	per_thread_cache_size_ = space;
				452	}
				453
				454	void ThreadCache::GetThreadStats(uint64_t* total_bytes, uint64_t* class_count) {
				455	for (ThreadCache* h = thread_heaps_; h != NULL; h = h->next_) {
				456	*total_bytes += h->Size();
				457	if (class_count) {
				458	for (int cl = 0; cl < kNumClasses; ++cl) {
				459	class_count[cl] += h->freelist_length(cl);
				460	}
				461	}
				462	}
				463	}
				464
				465	void ThreadCache::set_overall_thread_cache_size(size_t new_size) {
				466	// Clip the value to a reasonable range
				467	if (new_size < kMinThreadCacheSize) new_size = kMinThreadCacheSize;
				468	if (new_size > (1<<30)) new_size = (1<<30); // Limit to 1GB
				469	overall_thread_cache_size_ = new_size;
				470
				471	RecomputePerThreadCacheSize();
				472	}
				473
				474	} // namespace tcmalloc