blob: caa3e4a2af94244acb429f7edcb0f96092406261 [file] [log] [blame]
Austin Schuh745610d2015-09-06 18:19:50 -07001// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
2// Copyright (c) 2008, Google Inc.
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// ---
32// Author: Sanjay Ghemawat <opensource@google.com>
33//
34// Common definitions for tcmalloc code.
35
36#ifndef TCMALLOC_COMMON_H_
37#define TCMALLOC_COMMON_H_
38
39#include "config.h"
40#include <stddef.h> // for size_t
41#ifdef HAVE_STDINT_H
42#include <stdint.h> // for uintptr_t, uint64_t
43#endif
44#include "internal_logging.h" // for ASSERT, etc
45#include "base/basictypes.h" // for LIKELY, etc
46
Austin Schuh745610d2015-09-06 18:19:50 -070047// Type that can hold a page number
48typedef uintptr_t PageID;
49
50// Type that can hold the length of a run of pages
51typedef uintptr_t Length;
52
53//-------------------------------------------------------------------
54// Configuration
55//-------------------------------------------------------------------
56
57#if defined(TCMALLOC_ALIGN_8BYTES)
58// Unless we force to use 8 bytes alignment we use an alignment of
59// at least 16 bytes to statisfy requirements for some SSE types.
60// Keep in mind when using the 16 bytes alignment you can have a space
61// waste due alignment of 25%. (eg malloc of 24 bytes will get 32 bytes)
62static const size_t kMinAlign = 8;
Austin Schuh745610d2015-09-06 18:19:50 -070063#else
64static const size_t kMinAlign = 16;
Austin Schuh745610d2015-09-06 18:19:50 -070065#endif
66
67// Using large pages speeds up the execution at a cost of larger memory use.
68// Deallocation may speed up by a factor as the page map gets 8x smaller, so
69// lookups in the page map result in fewer L2 cache misses, which translates to
70// speedup for application/platform combinations with high L2 cache pressure.
71// As the number of size classes increases with large pages, we increase
72// the thread cache allowance to avoid passing more free ranges to and from
73// central lists. Also, larger pages are less likely to get freed.
74// These two factors cause a bounded increase in memory use.
Brian Silverman20350ac2021-11-17 18:19:55 -080075#if defined(TCMALLOC_PAGE_SIZE_SHIFT)
76static const size_t kPageShift = TCMALLOC_PAGE_SIZE_SHIFT;
Austin Schuh745610d2015-09-06 18:19:50 -070077#else
78static const size_t kPageShift = 13;
Austin Schuh745610d2015-09-06 18:19:50 -070079#endif
80
Brian Silverman20350ac2021-11-17 18:19:55 -080081static const size_t kClassSizesMax = 128;
82
Austin Schuh745610d2015-09-06 18:19:50 -070083static const size_t kMaxThreadCacheSize = 4 << 20;
84
85static const size_t kPageSize = 1 << kPageShift;
86static const size_t kMaxSize = 256 * 1024;
87static const size_t kAlignment = 8;
Brian Silverman20350ac2021-11-17 18:19:55 -080088// For all span-lengths <= kMaxPages we keep an exact-size list in PageHeap.
Austin Schuh745610d2015-09-06 18:19:50 -070089static const size_t kMaxPages = 1 << (20 - kPageShift);
90
91// Default bound on the total amount of thread caches.
92#ifdef TCMALLOC_SMALL_BUT_SLOW
93// Make the overall thread cache no bigger than that of a single thread
94// for the small memory footprint case.
95static const size_t kDefaultOverallThreadCacheSize = kMaxThreadCacheSize;
96#else
97static const size_t kDefaultOverallThreadCacheSize = 8u * kMaxThreadCacheSize;
98#endif
99
100// Lower bound on the per-thread cache sizes
101static const size_t kMinThreadCacheSize = kMaxSize * 2;
102
103// The number of bytes one ThreadCache will steal from another when
104// the first ThreadCache is forced to Scavenge(), delaying the
105// next call to Scavenge for this thread.
106static const size_t kStealAmount = 1 << 16;
107
108// The number of times that a deallocation can cause a freelist to
109// go over its max_length() before shrinking max_length().
110static const int kMaxOverages = 3;
111
112// Maximum length we allow a per-thread free-list to have before we
113// move objects from it into the corresponding central free-list. We
114// want this big to avoid locking the central free-list too often. It
115// should not hurt to make this list somewhat big because the
116// scavenging code will shrink it down when its contents are not in use.
117static const int kMaxDynamicFreeListLength = 8192;
118
119static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift;
120
Brian Silverman20350ac2021-11-17 18:19:55 -0800121#if __aarch64__ || __x86_64__ || _M_AMD64 || _M_ARM64
122// All current x86_64 processors only look at the lower 48 bits in
123// virtual to physical address translation. The top 16 are all same as
124// bit 47. And bit 47 value 1 reserved for kernel-space addresses in
125// practice. So it is actually 47 usable bits from malloc
126// perspective. This lets us use faster two level page maps on this
127// architecture.
128//
129// There is very similar story on 64-bit arms except it has full 48
130// bits for user-space. Because of that, and because in principle OSes
131// can start giving some of highest-bit-set addresses to user-space,
132// we don't bother to limit x86 to 47 bits.
133//
134// As of now there are published plans to add more bits to x86-64
135// virtual address space, but since 48 bits has been norm for long
136// time and lots of software is relying on it, it will be opt-in from
137// OS perspective. So we can keep doing "48 bits" at least for now.
Austin Schuh745610d2015-09-06 18:19:50 -0700138static const int kAddressBits = (sizeof(void*) < 8 ? (8 * sizeof(void*)) : 48);
139#else
Brian Silverman20350ac2021-11-17 18:19:55 -0800140// mipsen and ppcs have more general hardware so we have to support
141// full 64-bits of addresses.
Austin Schuh745610d2015-09-06 18:19:50 -0700142static const int kAddressBits = 8 * sizeof(void*);
143#endif
144
145namespace tcmalloc {
146
147// Convert byte size into pages. This won't overflow, but may return
148// an unreasonably large value if bytes is huge enough.
149inline Length pages(size_t bytes) {
150 return (bytes >> kPageShift) +
151 ((bytes & (kPageSize - 1)) > 0 ? 1 : 0);
152}
153
154// For larger allocation sizes, we use larger memory alignments to
155// reduce the number of size classes.
156int AlignmentForSize(size_t size);
157
158// Size-class information + mapping
159class SizeMap {
160 private:
Austin Schuh745610d2015-09-06 18:19:50 -0700161 //-------------------------------------------------------------------
162 // Mapping from size to size_class and vice versa
163 //-------------------------------------------------------------------
164
165 // Sizes <= 1024 have an alignment >= 8. So for such sizes we have an
166 // array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128.
167 // So for these larger sizes we have an array indexed by ceil(size/128).
168 //
169 // We flatten both logical arrays into one physical array and use
170 // arithmetic to compute an appropriate index. The constants used by
171 // ClassIndex() were selected to make the flattening work.
172 //
173 // Examples:
174 // Size Expression Index
175 // -------------------------------------------------------
176 // 0 (0 + 7) / 8 0
177 // 1 (1 + 7) / 8 1
178 // ...
179 // 1024 (1024 + 7) / 8 128
180 // 1025 (1025 + 127 + (120<<7)) / 128 129
181 // ...
182 // 32768 (32768 + 127 + (120<<7)) / 128 376
183 static const int kMaxSmallSize = 1024;
184 static const size_t kClassArraySize =
185 ((kMaxSize + 127 + (120 << 7)) >> 7) + 1;
186 unsigned char class_array_[kClassArraySize];
187
Brian Silverman20350ac2021-11-17 18:19:55 -0800188 static inline size_t SmallSizeClass(size_t s) {
189 return (static_cast<uint32_t>(s) + 7) >> 3;
190 }
191
192 static inline size_t LargeSizeClass(size_t s) {
193 return (static_cast<uint32_t>(s) + 127 + (120 << 7)) >> 7;
194 }
195
196 // If size is no more than kMaxSize, compute index of the
197 // class_array[] entry for it, putting the class index in output
198 // parameter idx and returning true. Otherwise return false.
199 static inline bool ATTRIBUTE_ALWAYS_INLINE ClassIndexMaybe(size_t s,
200 uint32* idx) {
201 if (PREDICT_TRUE(s <= kMaxSmallSize)) {
202 *idx = (static_cast<uint32>(s) + 7) >> 3;
203 return true;
204 } else if (s <= kMaxSize) {
205 *idx = (static_cast<uint32>(s) + 127 + (120 << 7)) >> 7;
206 return true;
207 }
208 return false;
209 }
210
Austin Schuh745610d2015-09-06 18:19:50 -0700211 // Compute index of the class_array[] entry for a given size
Brian Silverman20350ac2021-11-17 18:19:55 -0800212 static inline size_t ClassIndex(size_t s) {
Austin Schuh745610d2015-09-06 18:19:50 -0700213 // Use unsigned arithmetic to avoid unnecessary sign extensions.
214 ASSERT(0 <= s);
215 ASSERT(s <= kMaxSize);
Brian Silverman20350ac2021-11-17 18:19:55 -0800216 if (PREDICT_TRUE(s <= kMaxSmallSize)) {
217 return SmallSizeClass(s);
Austin Schuh745610d2015-09-06 18:19:50 -0700218 } else {
Brian Silverman20350ac2021-11-17 18:19:55 -0800219 return LargeSizeClass(s);
Austin Schuh745610d2015-09-06 18:19:50 -0700220 }
221 }
222
Brian Silverman20350ac2021-11-17 18:19:55 -0800223 // Number of objects to move between a per-thread list and a central
224 // list in one shot. We want this to be not too small so we can
225 // amortize the lock overhead for accessing the central list. Making
226 // it too big may temporarily cause unnecessary memory wastage in the
227 // per-thread free list until the scavenger cleans up the list.
228 int num_objects_to_move_[kClassSizesMax];
229
Austin Schuh745610d2015-09-06 18:19:50 -0700230 int NumMoveSize(size_t size);
231
232 // Mapping from size class to max size storable in that class
Brian Silverman20350ac2021-11-17 18:19:55 -0800233 int32 class_to_size_[kClassSizesMax];
Austin Schuh745610d2015-09-06 18:19:50 -0700234
235 // Mapping from size class to number of pages to allocate at a time
Brian Silverman20350ac2021-11-17 18:19:55 -0800236 size_t class_to_pages_[kClassSizesMax];
Austin Schuh745610d2015-09-06 18:19:50 -0700237
238 public:
Brian Silverman20350ac2021-11-17 18:19:55 -0800239 size_t num_size_classes;
240
Austin Schuh745610d2015-09-06 18:19:50 -0700241 // Constructor should do nothing since we rely on explicit Init()
242 // call, which may or may not be called before the constructor runs.
243 SizeMap() { }
244
245 // Initialize the mapping arrays
246 void Init();
247
Brian Silverman20350ac2021-11-17 18:19:55 -0800248 inline int SizeClass(size_t size) {
Austin Schuh745610d2015-09-06 18:19:50 -0700249 return class_array_[ClassIndex(size)];
250 }
251
Brian Silverman20350ac2021-11-17 18:19:55 -0800252 // Check if size is small enough to be representable by a size
253 // class, and if it is, put matching size class into *cl. Returns
254 // true iff matching size class was found.
255 inline bool ATTRIBUTE_ALWAYS_INLINE GetSizeClass(size_t size, uint32* cl) {
256 uint32 idx;
257 if (!ClassIndexMaybe(size, &idx)) {
258 return false;
259 }
260 *cl = class_array_[idx];
261 return true;
262 }
263
Austin Schuh745610d2015-09-06 18:19:50 -0700264 // Get the byte-size for a specified class
Brian Silverman20350ac2021-11-17 18:19:55 -0800265 inline int32 ATTRIBUTE_ALWAYS_INLINE ByteSizeForClass(uint32 cl) {
Austin Schuh745610d2015-09-06 18:19:50 -0700266 return class_to_size_[cl];
267 }
268
269 // Mapping from size class to max size storable in that class
Brian Silverman20350ac2021-11-17 18:19:55 -0800270 inline int32 class_to_size(uint32 cl) {
Austin Schuh745610d2015-09-06 18:19:50 -0700271 return class_to_size_[cl];
272 }
273
274 // Mapping from size class to number of pages to allocate at a time
Brian Silverman20350ac2021-11-17 18:19:55 -0800275 inline size_t class_to_pages(uint32 cl) {
Austin Schuh745610d2015-09-06 18:19:50 -0700276 return class_to_pages_[cl];
277 }
278
279 // Number of objects to move between a per-thread list and a central
280 // list in one shot. We want this to be not too small so we can
281 // amortize the lock overhead for accessing the central list. Making
282 // it too big may temporarily cause unnecessary memory wastage in the
283 // per-thread free list until the scavenger cleans up the list.
Brian Silverman20350ac2021-11-17 18:19:55 -0800284 inline int num_objects_to_move(uint32 cl) {
Austin Schuh745610d2015-09-06 18:19:50 -0700285 return num_objects_to_move_[cl];
286 }
287};
288
289// Allocates "bytes" worth of memory and returns it. Increments
290// metadata_system_bytes appropriately. May return NULL if allocation
291// fails. Requires pageheap_lock is held.
292void* MetaDataAlloc(size_t bytes);
293
294// Returns the total number of bytes allocated from the system.
295// Requires pageheap_lock is held.
296uint64_t metadata_system_bytes();
297
298// size/depth are made the same size as a pointer so that some generic
299// code below can conveniently cast them back and forth to void*.
300static const int kMaxStackDepth = 31;
301struct StackTrace {
302 uintptr_t size; // Size of object
303 uintptr_t depth; // Number of PC values stored in array below
304 void* stack[kMaxStackDepth];
305};
306
307} // namespace tcmalloc
308
309#endif // TCMALLOC_COMMON_H_