Blame - third_party/gperftools/src/common.h - RealtimeRoboticsGroup/test

blob: c3484d37f094a856ede4a07a562a5ff05eb084c3 [file] [log] [blame]

Austin Schuh	745610d	2015-09-06 18:19:50 -0700	[diff] [blame]	1	// -- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil --
				2	// Copyright (c) 2008, Google Inc.
				3	// All rights reserved.
				4	//
				5	// Redistribution and use in source and binary forms, with or without
				6	// modification, are permitted provided that the following conditions are
				7	// met:
				8	//
				9	// * Redistributions of source code must retain the above copyright
				10	// notice, this list of conditions and the following disclaimer.
				11	// * Redistributions in binary form must reproduce the above
				12	// copyright notice, this list of conditions and the following disclaimer
				13	// in the documentation and/or other materials provided with the
				14	// distribution.
				15	// * Neither the name of Google Inc. nor the names of its
				16	// contributors may be used to endorse or promote products derived from
				17	// this software without specific prior written permission.
				18	//
				19	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				20	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				21	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				22	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				23	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				24	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				25	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				26	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				27	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				28	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				29	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				30
				31	// ---
				32	// Author: Sanjay Ghemawat <opensource@google.com>
				33	//
				34	// Common definitions for tcmalloc code.
				35
				36	#ifndef TCMALLOC_COMMON_H_
				37	#define TCMALLOC_COMMON_H_
				38
				39	#include "config.h"
				40	#include <stddef.h> // for size_t
				41	#ifdef HAVE_STDINT_H
				42	#include <stdint.h> // for uintptr_t, uint64_t
				43	#endif
				44	#include "internal_logging.h" // for ASSERT, etc
				45	#include "base/basictypes.h" // for LIKELY, etc
				46
				47	#ifdef HAVE_BUILTIN_EXPECT
				48	#define LIKELY(x) __builtin_expect(!!(x), 1)
				49	#define UNLIKELY(x) __builtin_expect(!!(x), 0)
				50	#else
				51	#define LIKELY(x) (x)
				52	#define UNLIKELY(x) (x)
				53	#endif
				54
				55	// Type that can hold a page number
				56	typedef uintptr_t PageID;
				57
				58	// Type that can hold the length of a run of pages
				59	typedef uintptr_t Length;
				60
				61	//-------------------------------------------------------------------
				62	// Configuration
				63	//-------------------------------------------------------------------
				64
				65	#if defined(TCMALLOC_ALIGN_8BYTES)
				66	// Unless we force to use 8 bytes alignment we use an alignment of
				67	// at least 16 bytes to statisfy requirements for some SSE types.
				68	// Keep in mind when using the 16 bytes alignment you can have a space
				69	// waste due alignment of 25%. (eg malloc of 24 bytes will get 32 bytes)
				70	static const size_t kMinAlign = 8;
				71	// Number of classes created until reach page size 128.
				72	static const size_t kBaseClasses = 16;
				73	#else
				74	static const size_t kMinAlign = 16;
				75	static const size_t kBaseClasses = 9;
				76	#endif
				77
				78	// Using large pages speeds up the execution at a cost of larger memory use.
				79	// Deallocation may speed up by a factor as the page map gets 8x smaller, so
				80	// lookups in the page map result in fewer L2 cache misses, which translates to
				81	// speedup for application/platform combinations with high L2 cache pressure.
				82	// As the number of size classes increases with large pages, we increase
				83	// the thread cache allowance to avoid passing more free ranges to and from
				84	// central lists. Also, larger pages are less likely to get freed.
				85	// These two factors cause a bounded increase in memory use.
				86	#if defined(TCMALLOC_32K_PAGES)
				87	static const size_t kPageShift = 15;
				88	static const size_t kNumClasses = kBaseClasses + 69;
				89	#elif defined(TCMALLOC_64K_PAGES)
				90	static const size_t kPageShift = 16;
				91	static const size_t kNumClasses = kBaseClasses + 73;
				92	#else
				93	static const size_t kPageShift = 13;
				94	static const size_t kNumClasses = kBaseClasses + 79;
				95	#endif
				96
				97	static const size_t kMaxThreadCacheSize = 4 << 20;
				98
				99	static const size_t kPageSize = 1 << kPageShift;
				100	static const size_t kMaxSize = 256 * 1024;
				101	static const size_t kAlignment = 8;
				102	static const size_t kLargeSizeClass = 0;
				103	// For all span-lengths < kMaxPages we keep an exact-size list.
				104	static const size_t kMaxPages = 1 << (20 - kPageShift);
				105
				106	// Default bound on the total amount of thread caches.
				107	#ifdef TCMALLOC_SMALL_BUT_SLOW
				108	// Make the overall thread cache no bigger than that of a single thread
				109	// for the small memory footprint case.
				110	static const size_t kDefaultOverallThreadCacheSize = kMaxThreadCacheSize;
				111	#else
				112	static const size_t kDefaultOverallThreadCacheSize = 8u * kMaxThreadCacheSize;
				113	#endif
				114
				115	// Lower bound on the per-thread cache sizes
				116	static const size_t kMinThreadCacheSize = kMaxSize * 2;
				117
				118	// The number of bytes one ThreadCache will steal from another when
				119	// the first ThreadCache is forced to Scavenge(), delaying the
				120	// next call to Scavenge for this thread.
				121	static const size_t kStealAmount = 1 << 16;
				122
				123	// The number of times that a deallocation can cause a freelist to
				124	// go over its max_length() before shrinking max_length().
				125	static const int kMaxOverages = 3;
				126
				127	// Maximum length we allow a per-thread free-list to have before we
				128	// move objects from it into the corresponding central free-list. We
				129	// want this big to avoid locking the central free-list too often. It
				130	// should not hurt to make this list somewhat big because the
				131	// scavenging code will shrink it down when its contents are not in use.
				132	static const int kMaxDynamicFreeListLength = 8192;
				133
				134	static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift;
				135
				136	#if defined __x86_64__
				137	// All current and planned x86_64 processors only look at the lower 48 bits
				138	// in virtual to physical address translation. The top 16 are thus unused.
				139	// TODO(rus): Under what operating systems can we increase it safely to 17?
				140	// This lets us use smaller page maps. On first allocation, a 36-bit page map
				141	// uses only 96 KB instead of the 4.5 MB used by a 52-bit page map.
				142	static const int kAddressBits = (sizeof(void) < 8 ? (8 sizeof(void*)) : 48);
				143	#else
				144	static const int kAddressBits = 8 * sizeof(void*);
				145	#endif
				146
				147	namespace tcmalloc {
				148
				149	// Convert byte size into pages. This won't overflow, but may return
				150	// an unreasonably large value if bytes is huge enough.
				151	inline Length pages(size_t bytes) {
				152	return (bytes >> kPageShift) +
				153	((bytes & (kPageSize - 1)) > 0 ? 1 : 0);
				154	}
				155
				156	// For larger allocation sizes, we use larger memory alignments to
				157	// reduce the number of size classes.
				158	int AlignmentForSize(size_t size);
				159
				160	// Size-class information + mapping
				161	class SizeMap {
				162	private:
				163	// Number of objects to move between a per-thread list and a central
				164	// list in one shot. We want this to be not too small so we can
				165	// amortize the lock overhead for accessing the central list. Making
				166	// it too big may temporarily cause unnecessary memory wastage in the
				167	// per-thread free list until the scavenger cleans up the list.
				168	int num_objects_to_move_[kNumClasses];
				169
				170	//-------------------------------------------------------------------
				171	// Mapping from size to size_class and vice versa
				172	//-------------------------------------------------------------------
				173
				174	// Sizes <= 1024 have an alignment >= 8. So for such sizes we have an
				175	// array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128.
				176	// So for these larger sizes we have an array indexed by ceil(size/128).
				177	//
				178	// We flatten both logical arrays into one physical array and use
				179	// arithmetic to compute an appropriate index. The constants used by
				180	// ClassIndex() were selected to make the flattening work.
				181	//
				182	// Examples:
				183	// Size Expression Index
				184	// -------------------------------------------------------
				185	// 0 (0 + 7) / 8 0
				186	// 1 (1 + 7) / 8 1
				187	// ...
				188	// 1024 (1024 + 7) / 8 128
				189	// 1025 (1025 + 127 + (120<<7)) / 128 129
				190	// ...
				191	// 32768 (32768 + 127 + (120<<7)) / 128 376
				192	static const int kMaxSmallSize = 1024;
				193	static const size_t kClassArraySize =
				194	((kMaxSize + 127 + (120 << 7)) >> 7) + 1;
				195	unsigned char class_array_[kClassArraySize];
				196
				197	// Compute index of the class_array[] entry for a given size
				198	static inline size_t ClassIndex(int s) {
				199	// Use unsigned arithmetic to avoid unnecessary sign extensions.
				200	ASSERT(0 <= s);
				201	ASSERT(s <= kMaxSize);
				202	if (LIKELY(s <= kMaxSmallSize)) {
				203	return (static_cast<uint32_t>(s) + 7) >> 3;
				204	} else {
				205	return (static_cast<uint32_t>(s) + 127 + (120 << 7)) >> 7;
				206	}
				207	}
				208
				209	int NumMoveSize(size_t size);
				210
				211	// Mapping from size class to max size storable in that class
				212	size_t class_to_size_[kNumClasses];
				213
				214	// Mapping from size class to number of pages to allocate at a time
				215	size_t class_to_pages_[kNumClasses];
				216
				217	public:
				218	// Constructor should do nothing since we rely on explicit Init()
				219	// call, which may or may not be called before the constructor runs.
				220	SizeMap() { }
				221
				222	// Initialize the mapping arrays
				223	void Init();
				224
				225	inline int SizeClass(int size) {
				226	return class_array_[ClassIndex(size)];
				227	}
				228
				229	// Get the byte-size for a specified class
				230	inline size_t ByteSizeForClass(size_t cl) {
				231	return class_to_size_[cl];
				232	}
				233
				234	// Mapping from size class to max size storable in that class
				235	inline size_t class_to_size(size_t cl) {
				236	return class_to_size_[cl];
				237	}
				238
				239	// Mapping from size class to number of pages to allocate at a time
				240	inline size_t class_to_pages(size_t cl) {
				241	return class_to_pages_[cl];
				242	}
				243
				244	// Number of objects to move between a per-thread list and a central
				245	// list in one shot. We want this to be not too small so we can
				246	// amortize the lock overhead for accessing the central list. Making
				247	// it too big may temporarily cause unnecessary memory wastage in the
				248	// per-thread free list until the scavenger cleans up the list.
				249	inline int num_objects_to_move(size_t cl) {
				250	return num_objects_to_move_[cl];
				251	}
				252	};
				253
				254	// Allocates "bytes" worth of memory and returns it. Increments
				255	// metadata_system_bytes appropriately. May return NULL if allocation
				256	// fails. Requires pageheap_lock is held.
				257	void* MetaDataAlloc(size_t bytes);
				258
				259	// Returns the total number of bytes allocated from the system.
				260	// Requires pageheap_lock is held.
				261	uint64_t metadata_system_bytes();
				262
				263	// size/depth are made the same size as a pointer so that some generic
				264	// code below can conveniently cast them back and forth to void*.
				265	static const int kMaxStackDepth = 31;
				266	struct StackTrace {
				267	uintptr_t size; // Size of object
				268	uintptr_t depth; // Number of PC values stored in array below
				269	void* stack[kMaxStackDepth];
				270	};
				271
				272	} // namespace tcmalloc
				273
				274	#endif // TCMALLOC_COMMON_H_