blob: c3484d37f094a856ede4a07a562a5ff05eb084c3 [file] [log] [blame]
Austin Schuh745610d2015-09-06 18:19:50 -07001// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
2// Copyright (c) 2008, Google Inc.
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// ---
32// Author: Sanjay Ghemawat <opensource@google.com>
33//
34// Common definitions for tcmalloc code.
35
36#ifndef TCMALLOC_COMMON_H_
37#define TCMALLOC_COMMON_H_
38
39#include "config.h"
40#include <stddef.h> // for size_t
41#ifdef HAVE_STDINT_H
42#include <stdint.h> // for uintptr_t, uint64_t
43#endif
44#include "internal_logging.h" // for ASSERT, etc
45#include "base/basictypes.h" // for LIKELY, etc
46
47#ifdef HAVE_BUILTIN_EXPECT
48#define LIKELY(x) __builtin_expect(!!(x), 1)
49#define UNLIKELY(x) __builtin_expect(!!(x), 0)
50#else
51#define LIKELY(x) (x)
52#define UNLIKELY(x) (x)
53#endif
54
55// Type that can hold a page number
56typedef uintptr_t PageID;
57
58// Type that can hold the length of a run of pages
59typedef uintptr_t Length;
60
61//-------------------------------------------------------------------
62// Configuration
63//-------------------------------------------------------------------
64
65#if defined(TCMALLOC_ALIGN_8BYTES)
66// Unless we force to use 8 bytes alignment we use an alignment of
67// at least 16 bytes to statisfy requirements for some SSE types.
68// Keep in mind when using the 16 bytes alignment you can have a space
69// waste due alignment of 25%. (eg malloc of 24 bytes will get 32 bytes)
70static const size_t kMinAlign = 8;
71// Number of classes created until reach page size 128.
72static const size_t kBaseClasses = 16;
73#else
74static const size_t kMinAlign = 16;
75static const size_t kBaseClasses = 9;
76#endif
77
78// Using large pages speeds up the execution at a cost of larger memory use.
79// Deallocation may speed up by a factor as the page map gets 8x smaller, so
80// lookups in the page map result in fewer L2 cache misses, which translates to
81// speedup for application/platform combinations with high L2 cache pressure.
82// As the number of size classes increases with large pages, we increase
83// the thread cache allowance to avoid passing more free ranges to and from
84// central lists. Also, larger pages are less likely to get freed.
85// These two factors cause a bounded increase in memory use.
86#if defined(TCMALLOC_32K_PAGES)
87static const size_t kPageShift = 15;
88static const size_t kNumClasses = kBaseClasses + 69;
89#elif defined(TCMALLOC_64K_PAGES)
90static const size_t kPageShift = 16;
91static const size_t kNumClasses = kBaseClasses + 73;
92#else
93static const size_t kPageShift = 13;
94static const size_t kNumClasses = kBaseClasses + 79;
95#endif
96
97static const size_t kMaxThreadCacheSize = 4 << 20;
98
99static const size_t kPageSize = 1 << kPageShift;
100static const size_t kMaxSize = 256 * 1024;
101static const size_t kAlignment = 8;
102static const size_t kLargeSizeClass = 0;
103// For all span-lengths < kMaxPages we keep an exact-size list.
104static const size_t kMaxPages = 1 << (20 - kPageShift);
105
106// Default bound on the total amount of thread caches.
107#ifdef TCMALLOC_SMALL_BUT_SLOW
108// Make the overall thread cache no bigger than that of a single thread
109// for the small memory footprint case.
110static const size_t kDefaultOverallThreadCacheSize = kMaxThreadCacheSize;
111#else
112static const size_t kDefaultOverallThreadCacheSize = 8u * kMaxThreadCacheSize;
113#endif
114
115// Lower bound on the per-thread cache sizes
116static const size_t kMinThreadCacheSize = kMaxSize * 2;
117
118// The number of bytes one ThreadCache will steal from another when
119// the first ThreadCache is forced to Scavenge(), delaying the
120// next call to Scavenge for this thread.
121static const size_t kStealAmount = 1 << 16;
122
123// The number of times that a deallocation can cause a freelist to
124// go over its max_length() before shrinking max_length().
125static const int kMaxOverages = 3;
126
127// Maximum length we allow a per-thread free-list to have before we
128// move objects from it into the corresponding central free-list. We
129// want this big to avoid locking the central free-list too often. It
130// should not hurt to make this list somewhat big because the
131// scavenging code will shrink it down when its contents are not in use.
132static const int kMaxDynamicFreeListLength = 8192;
133
134static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift;
135
136#if defined __x86_64__
137// All current and planned x86_64 processors only look at the lower 48 bits
138// in virtual to physical address translation. The top 16 are thus unused.
139// TODO(rus): Under what operating systems can we increase it safely to 17?
140// This lets us use smaller page maps. On first allocation, a 36-bit page map
141// uses only 96 KB instead of the 4.5 MB used by a 52-bit page map.
142static const int kAddressBits = (sizeof(void*) < 8 ? (8 * sizeof(void*)) : 48);
143#else
144static const int kAddressBits = 8 * sizeof(void*);
145#endif
146
147namespace tcmalloc {
148
149// Convert byte size into pages. This won't overflow, but may return
150// an unreasonably large value if bytes is huge enough.
151inline Length pages(size_t bytes) {
152 return (bytes >> kPageShift) +
153 ((bytes & (kPageSize - 1)) > 0 ? 1 : 0);
154}
155
156// For larger allocation sizes, we use larger memory alignments to
157// reduce the number of size classes.
158int AlignmentForSize(size_t size);
159
160// Size-class information + mapping
161class SizeMap {
162 private:
163 // Number of objects to move between a per-thread list and a central
164 // list in one shot. We want this to be not too small so we can
165 // amortize the lock overhead for accessing the central list. Making
166 // it too big may temporarily cause unnecessary memory wastage in the
167 // per-thread free list until the scavenger cleans up the list.
168 int num_objects_to_move_[kNumClasses];
169
170 //-------------------------------------------------------------------
171 // Mapping from size to size_class and vice versa
172 //-------------------------------------------------------------------
173
174 // Sizes <= 1024 have an alignment >= 8. So for such sizes we have an
175 // array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128.
176 // So for these larger sizes we have an array indexed by ceil(size/128).
177 //
178 // We flatten both logical arrays into one physical array and use
179 // arithmetic to compute an appropriate index. The constants used by
180 // ClassIndex() were selected to make the flattening work.
181 //
182 // Examples:
183 // Size Expression Index
184 // -------------------------------------------------------
185 // 0 (0 + 7) / 8 0
186 // 1 (1 + 7) / 8 1
187 // ...
188 // 1024 (1024 + 7) / 8 128
189 // 1025 (1025 + 127 + (120<<7)) / 128 129
190 // ...
191 // 32768 (32768 + 127 + (120<<7)) / 128 376
192 static const int kMaxSmallSize = 1024;
193 static const size_t kClassArraySize =
194 ((kMaxSize + 127 + (120 << 7)) >> 7) + 1;
195 unsigned char class_array_[kClassArraySize];
196
197 // Compute index of the class_array[] entry for a given size
198 static inline size_t ClassIndex(int s) {
199 // Use unsigned arithmetic to avoid unnecessary sign extensions.
200 ASSERT(0 <= s);
201 ASSERT(s <= kMaxSize);
202 if (LIKELY(s <= kMaxSmallSize)) {
203 return (static_cast<uint32_t>(s) + 7) >> 3;
204 } else {
205 return (static_cast<uint32_t>(s) + 127 + (120 << 7)) >> 7;
206 }
207 }
208
209 int NumMoveSize(size_t size);
210
211 // Mapping from size class to max size storable in that class
212 size_t class_to_size_[kNumClasses];
213
214 // Mapping from size class to number of pages to allocate at a time
215 size_t class_to_pages_[kNumClasses];
216
217 public:
218 // Constructor should do nothing since we rely on explicit Init()
219 // call, which may or may not be called before the constructor runs.
220 SizeMap() { }
221
222 // Initialize the mapping arrays
223 void Init();
224
225 inline int SizeClass(int size) {
226 return class_array_[ClassIndex(size)];
227 }
228
229 // Get the byte-size for a specified class
230 inline size_t ByteSizeForClass(size_t cl) {
231 return class_to_size_[cl];
232 }
233
234 // Mapping from size class to max size storable in that class
235 inline size_t class_to_size(size_t cl) {
236 return class_to_size_[cl];
237 }
238
239 // Mapping from size class to number of pages to allocate at a time
240 inline size_t class_to_pages(size_t cl) {
241 return class_to_pages_[cl];
242 }
243
244 // Number of objects to move between a per-thread list and a central
245 // list in one shot. We want this to be not too small so we can
246 // amortize the lock overhead for accessing the central list. Making
247 // it too big may temporarily cause unnecessary memory wastage in the
248 // per-thread free list until the scavenger cleans up the list.
249 inline int num_objects_to_move(size_t cl) {
250 return num_objects_to_move_[cl];
251 }
252};
253
254// Allocates "bytes" worth of memory and returns it. Increments
255// metadata_system_bytes appropriately. May return NULL if allocation
256// fails. Requires pageheap_lock is held.
257void* MetaDataAlloc(size_t bytes);
258
259// Returns the total number of bytes allocated from the system.
260// Requires pageheap_lock is held.
261uint64_t metadata_system_bytes();
262
263// size/depth are made the same size as a pointer so that some generic
264// code below can conveniently cast them back and forth to void*.
265static const int kMaxStackDepth = 31;
266struct StackTrace {
267 uintptr_t size; // Size of object
268 uintptr_t depth; // Number of PC values stored in array below
269 void* stack[kMaxStackDepth];
270};
271
272} // namespace tcmalloc
273
274#endif // TCMALLOC_COMMON_H_