blob: b7d1913ea061a78b59db38c4598f0ee6c6773dcb [file] [log] [blame]
Austin Schuh745610d2015-09-06 18:19:50 -07001// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
2// Copyright (c) 2005, Google Inc.
3// All rights reserved.
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// ---
32// Author: Sanjay Ghemawat <opensource@google.com>
33//
34// A malloc that uses a per-thread cache to satisfy small malloc requests.
35// (The time for malloc/free of a small object drops from 300 ns to 50 ns.)
36//
37// See doc/tcmalloc.html for a high-level
38// description of how this malloc works.
39//
40// SYNCHRONIZATION
41// 1. The thread-specific lists are accessed without acquiring any locks.
42// This is safe because each such list is only accessed by one thread.
43// 2. We have a lock per central free-list, and hold it while manipulating
44// the central free list for a particular size.
45// 3. The central page allocator is protected by "pageheap_lock".
46// 4. The pagemap (which maps from page-number to descriptor),
47// can be read without holding any locks, and written while holding
48// the "pageheap_lock".
49// 5. To improve performance, a subset of the information one can get
50// from the pagemap is cached in a data structure, pagemap_cache_,
51// that atomically reads and writes its entries. This cache can be
52// read and written without locking.
53//
54// This multi-threaded access to the pagemap is safe for fairly
55// subtle reasons. We basically assume that when an object X is
56// allocated by thread A and deallocated by thread B, there must
57// have been appropriate synchronization in the handoff of object
58// X from thread A to thread B. The same logic applies to pagemap_cache_.
59//
60// THE PAGEID-TO-SIZECLASS CACHE
61// Hot PageID-to-sizeclass mappings are held by pagemap_cache_. If this cache
62// returns 0 for a particular PageID then that means "no information," not that
63// the sizeclass is 0. The cache may have stale information for pages that do
64// not hold the beginning of any free()'able object. Staleness is eliminated
65// in Populate() for pages with sizeclass > 0 objects, and in do_malloc() and
66// do_memalign() for all other relevant pages.
67//
68// PAGEMAP
69// -------
70// Page map contains a mapping from page id to Span.
71//
72// If Span s occupies pages [p..q],
73// pagemap[p] == s
74// pagemap[q] == s
75// pagemap[p+1..q-1] are undefined
76// pagemap[p-1] and pagemap[q+1] are defined:
77// NULL if the corresponding page is not yet in the address space.
78// Otherwise it points to a Span. This span may be free
79// or allocated. If free, it is in one of pageheap's freelist.
80//
81// TODO: Bias reclamation to larger addresses
82// TODO: implement mallinfo/mallopt
83// TODO: Better testing
84//
85// 9/28/2003 (new page-level allocator replaces ptmalloc2):
86// * malloc/free of small objects goes from ~300 ns to ~50 ns.
87// * allocation of a reasonably complicated struct
88// goes from about 1100 ns to about 300 ns.
89
90#include "config.h"
91#include <gperftools/tcmalloc.h>
92
93#include <errno.h> // for ENOMEM, EINVAL, errno
94#ifdef HAVE_SYS_CDEFS_H
95#include <sys/cdefs.h> // for __THROW
96#endif
97#if defined HAVE_STDINT_H
98#include <stdint.h>
99#elif defined HAVE_INTTYPES_H
100#include <inttypes.h>
101#else
102#include <sys/types.h>
103#endif
104#include <stddef.h> // for size_t, NULL
105#include <stdlib.h> // for getenv
106#include <string.h> // for strcmp, memset, strlen, etc
107#ifdef HAVE_UNISTD_H
108#include <unistd.h> // for getpagesize, write, etc
109#endif
110#include <algorithm> // for max, min
111#include <limits> // for numeric_limits
112#include <new> // for nothrow_t (ptr only), etc
113#include <vector> // for vector
114
115#include <gperftools/malloc_extension.h>
116#include <gperftools/malloc_hook.h> // for MallocHook
117#include "base/basictypes.h" // for int64
118#include "base/commandlineflags.h" // for RegisterFlagValidator, etc
119#include "base/dynamic_annotations.h" // for RunningOnValgrind
120#include "base/spinlock.h" // for SpinLockHolder
121#include "central_freelist.h" // for CentralFreeListPadded
122#include "common.h" // for StackTrace, kPageShift, etc
123#include "internal_logging.h" // for ASSERT, TCMalloc_Printer, etc
124#include "linked_list.h" // for SLL_SetNext
125#include "malloc_hook-inl.h" // for MallocHook::InvokeNewHook, etc
126#include "page_heap.h" // for PageHeap, PageHeap::Stats
127#include "page_heap_allocator.h" // for PageHeapAllocator
128#include "span.h" // for Span, DLL_Prepend, etc
129#include "stack_trace_table.h" // for StackTraceTable
130#include "static_vars.h" // for Static
131#include "system-alloc.h" // for DumpSystemAllocatorStats, etc
132#include "tcmalloc_guard.h" // for TCMallocGuard
133#include "thread_cache.h" // for ThreadCache
134
135#ifdef __clang__
136// clang's apparent focus on code size somehow causes it to ignore
137// normal inline directives even for few functions which inlining is
138// key for performance. In order to get performance of clang's
139// generated code closer to normal, we're forcing inlining via
140// attribute.
141#define ALWAYS_INLINE inline __attribute__((always_inline))
142#else
143#define ALWAYS_INLINE inline
144#endif
145
146#if (defined(_WIN32) && !defined(__CYGWIN__) && !defined(__CYGWIN32__)) && !defined(WIN32_OVERRIDE_ALLOCATORS)
147# define WIN32_DO_PATCHING 1
148#endif
149
150// Some windows file somewhere (at least on cygwin) #define's small (!)
151#undef small
152
153using STL_NAMESPACE::max;
154using STL_NAMESPACE::numeric_limits;
155using STL_NAMESPACE::vector;
156
157#include "libc_override.h"
158
159// __THROW is defined in glibc (via <sys/cdefs.h>). It means,
160// counter-intuitively, "This function will never throw an exception."
161// It's an optional optimization tool, but we may need to use it to
162// match glibc prototypes.
163#ifndef __THROW // I guess we're not on a glibc system
164# define __THROW // __THROW is just an optimization, so ok to make it ""
165#endif
166
167using tcmalloc::AlignmentForSize;
168using tcmalloc::kLog;
169using tcmalloc::kCrash;
170using tcmalloc::kCrashWithStats;
171using tcmalloc::Log;
172using tcmalloc::PageHeap;
173using tcmalloc::PageHeapAllocator;
174using tcmalloc::SizeMap;
175using tcmalloc::Span;
176using tcmalloc::StackTrace;
177using tcmalloc::Static;
178using tcmalloc::ThreadCache;
179
180DECLARE_int64(tcmalloc_sample_parameter);
181DECLARE_double(tcmalloc_release_rate);
182
183// For windows, the printf we use to report large allocs is
184// potentially dangerous: it could cause a malloc that would cause an
185// infinite loop. So by default we set the threshold to a huge number
186// on windows, so this bad situation will never trigger. You can
187// always set TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD manually if you
188// want this functionality.
189#ifdef _WIN32
190const int64 kDefaultLargeAllocReportThreshold = static_cast<int64>(1) << 62;
191#else
192const int64 kDefaultLargeAllocReportThreshold = static_cast<int64>(1) << 30;
193#endif
194DEFINE_int64(tcmalloc_large_alloc_report_threshold,
195 EnvToInt64("TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD",
196 kDefaultLargeAllocReportThreshold),
197 "Allocations larger than this value cause a stack "
198 "trace to be dumped to stderr. The threshold for "
199 "dumping stack traces is increased by a factor of 1.125 "
200 "every time we print a message so that the threshold "
201 "automatically goes up by a factor of ~1000 every 60 "
202 "messages. This bounds the amount of extra logging "
203 "generated by this flag. Default value of this flag "
204 "is very large and therefore you should see no extra "
205 "logging unless the flag is overridden. Set to 0 to "
206 "disable reporting entirely.");
207
208
209// We already declared these functions in tcmalloc.h, but we have to
210// declare them again to give them an ATTRIBUTE_SECTION: we want to
211// put all callers of MallocHook::Invoke* in this module into
212// ATTRIBUTE_SECTION(google_malloc) section, so that
213// MallocHook::GetCallerStackTrace can function accurately.
214#ifndef _WIN32 // windows doesn't have attribute_section, so don't bother
215extern "C" {
216 void* tc_malloc(size_t size) __THROW
217 ATTRIBUTE_SECTION(google_malloc);
218 void tc_free(void* ptr) __THROW
219 ATTRIBUTE_SECTION(google_malloc);
220 void* tc_realloc(void* ptr, size_t size) __THROW
221 ATTRIBUTE_SECTION(google_malloc);
222 void* tc_calloc(size_t nmemb, size_t size) __THROW
223 ATTRIBUTE_SECTION(google_malloc);
224 void tc_cfree(void* ptr) __THROW
225 ATTRIBUTE_SECTION(google_malloc);
226
227 void* tc_memalign(size_t __alignment, size_t __size) __THROW
228 ATTRIBUTE_SECTION(google_malloc);
229 int tc_posix_memalign(void** ptr, size_t align, size_t size) __THROW
230 ATTRIBUTE_SECTION(google_malloc);
231 void* tc_valloc(size_t __size) __THROW
232 ATTRIBUTE_SECTION(google_malloc);
233 void* tc_pvalloc(size_t __size) __THROW
234 ATTRIBUTE_SECTION(google_malloc);
235
236 void tc_malloc_stats(void) __THROW
237 ATTRIBUTE_SECTION(google_malloc);
238 int tc_mallopt(int cmd, int value) __THROW
239 ATTRIBUTE_SECTION(google_malloc);
240#ifdef HAVE_STRUCT_MALLINFO
241 struct mallinfo tc_mallinfo(void) __THROW
242 ATTRIBUTE_SECTION(google_malloc);
243#endif
244
245 void* tc_new(size_t size)
246 ATTRIBUTE_SECTION(google_malloc);
247 void tc_delete(void* p) __THROW
248 ATTRIBUTE_SECTION(google_malloc);
249 void* tc_newarray(size_t size)
250 ATTRIBUTE_SECTION(google_malloc);
251 void tc_deletearray(void* p) __THROW
252 ATTRIBUTE_SECTION(google_malloc);
253
254 // And the nothrow variants of these:
255 void* tc_new_nothrow(size_t size, const std::nothrow_t&) __THROW
256 ATTRIBUTE_SECTION(google_malloc);
257 void* tc_newarray_nothrow(size_t size, const std::nothrow_t&) __THROW
258 ATTRIBUTE_SECTION(google_malloc);
259 // Surprisingly, standard C++ library implementations use a
260 // nothrow-delete internally. See, eg:
261 // http://www.dinkumware.com/manuals/?manual=compleat&page=new.html
262 void tc_delete_nothrow(void* ptr, const std::nothrow_t&) __THROW
263 ATTRIBUTE_SECTION(google_malloc);
264 void tc_deletearray_nothrow(void* ptr, const std::nothrow_t&) __THROW
265 ATTRIBUTE_SECTION(google_malloc);
266
267 // Some non-standard extensions that we support.
268
269 // This is equivalent to
270 // OS X: malloc_size()
271 // glibc: malloc_usable_size()
272 // Windows: _msize()
273 size_t tc_malloc_size(void* p) __THROW
274 ATTRIBUTE_SECTION(google_malloc);
275} // extern "C"
276#endif // #ifndef _WIN32
277
278// ----------------------- IMPLEMENTATION -------------------------------
279
280static int tc_new_mode = 0; // See tc_set_new_mode().
281
282// Routines such as free() and realloc() catch some erroneous pointers
283// passed to them, and invoke the below when they do. (An erroneous pointer
284// won't be caught if it's within a valid span or a stale span for which
285// the pagemap cache has a non-zero sizeclass.) This is a cheap (source-editing
286// required) kind of exception handling for these routines.
287namespace {
288void InvalidFree(void* ptr) {
289 Log(kCrash, __FILE__, __LINE__, "Attempt to free invalid pointer", ptr);
290}
291
292size_t InvalidGetSizeForRealloc(const void* old_ptr) {
293 Log(kCrash, __FILE__, __LINE__,
294 "Attempt to realloc invalid pointer", old_ptr);
295 return 0;
296}
297
298size_t InvalidGetAllocatedSize(const void* ptr) {
299 Log(kCrash, __FILE__, __LINE__,
300 "Attempt to get the size of an invalid pointer", ptr);
301 return 0;
302}
303} // unnamed namespace
304
305// Extract interesting stats
306struct TCMallocStats {
307 uint64_t thread_bytes; // Bytes in thread caches
308 uint64_t central_bytes; // Bytes in central cache
309 uint64_t transfer_bytes; // Bytes in central transfer cache
310 uint64_t metadata_bytes; // Bytes alloced for metadata
311 PageHeap::Stats pageheap; // Stats from page heap
312};
313
314// Get stats into "r". Also, if class_count != NULL, class_count[k]
315// will be set to the total number of objects of size class k in the
316// central cache, transfer cache, and per-thread caches. If small_spans
317// is non-NULL, it is filled. Same for large_spans.
318static void ExtractStats(TCMallocStats* r, uint64_t* class_count,
319 PageHeap::SmallSpanStats* small_spans,
320 PageHeap::LargeSpanStats* large_spans) {
321 r->central_bytes = 0;
322 r->transfer_bytes = 0;
323 for (int cl = 0; cl < kNumClasses; ++cl) {
324 const int length = Static::central_cache()[cl].length();
325 const int tc_length = Static::central_cache()[cl].tc_length();
326 const size_t cache_overhead = Static::central_cache()[cl].OverheadBytes();
327 const size_t size = static_cast<uint64_t>(
328 Static::sizemap()->ByteSizeForClass(cl));
329 r->central_bytes += (size * length) + cache_overhead;
330 r->transfer_bytes += (size * tc_length);
331 if (class_count) {
332 // Sum the lengths of all per-class freelists, except the per-thread
333 // freelists, which get counted when we call GetThreadStats(), below.
334 class_count[cl] = length + tc_length;
335 }
336
337 }
338
339 // Add stats from per-thread heaps
340 r->thread_bytes = 0;
341 { // scope
342 SpinLockHolder h(Static::pageheap_lock());
343 ThreadCache::GetThreadStats(&r->thread_bytes, class_count);
344 r->metadata_bytes = tcmalloc::metadata_system_bytes();
345 r->pageheap = Static::pageheap()->stats();
346 if (small_spans != NULL) {
347 Static::pageheap()->GetSmallSpanStats(small_spans);
348 }
349 if (large_spans != NULL) {
350 Static::pageheap()->GetLargeSpanStats(large_spans);
351 }
352 }
353}
354
355static double PagesToMiB(uint64_t pages) {
356 return (pages << kPageShift) / 1048576.0;
357}
358
359// WRITE stats to "out"
360static void DumpStats(TCMalloc_Printer* out, int level) {
361 TCMallocStats stats;
362 uint64_t class_count[kNumClasses];
363 PageHeap::SmallSpanStats small;
364 PageHeap::LargeSpanStats large;
365 if (level >= 2) {
366 ExtractStats(&stats, class_count, &small, &large);
367 } else {
368 ExtractStats(&stats, NULL, NULL, NULL);
369 }
370
371 static const double MiB = 1048576.0;
372
373 const uint64_t virtual_memory_used = (stats.pageheap.system_bytes
374 + stats.metadata_bytes);
375 const uint64_t physical_memory_used = (virtual_memory_used
376 - stats.pageheap.unmapped_bytes);
377 const uint64_t bytes_in_use_by_app = (physical_memory_used
378 - stats.metadata_bytes
379 - stats.pageheap.free_bytes
380 - stats.central_bytes
381 - stats.transfer_bytes
382 - stats.thread_bytes);
383
384#ifdef TCMALLOC_SMALL_BUT_SLOW
385 out->printf(
386 "NOTE: SMALL MEMORY MODEL IS IN USE, PERFORMANCE MAY SUFFER.\n");
387#endif
388 out->printf(
389 "------------------------------------------------\n"
390 "MALLOC: %12" PRIu64 " (%7.1f MiB) Bytes in use by application\n"
391 "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in page heap freelist\n"
392 "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in central cache freelist\n"
393 "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in transfer cache freelist\n"
394 "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in thread cache freelists\n"
395 "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes in malloc metadata\n"
396 "MALLOC: ------------\n"
397 "MALLOC: = %12" PRIu64 " (%7.1f MiB) Actual memory used (physical + swap)\n"
398 "MALLOC: + %12" PRIu64 " (%7.1f MiB) Bytes released to OS (aka unmapped)\n"
399 "MALLOC: ------------\n"
400 "MALLOC: = %12" PRIu64 " (%7.1f MiB) Virtual address space used\n"
401 "MALLOC:\n"
402 "MALLOC: %12" PRIu64 " Spans in use\n"
403 "MALLOC: %12" PRIu64 " Thread heaps in use\n"
404 "MALLOC: %12" PRIu64 " Tcmalloc page size\n"
405 "------------------------------------------------\n"
406 "Call ReleaseFreeMemory() to release freelist memory to the OS"
407 " (via madvise()).\n"
408 "Bytes released to the OS take up virtual address space"
409 " but no physical memory.\n",
410 bytes_in_use_by_app, bytes_in_use_by_app / MiB,
411 stats.pageheap.free_bytes, stats.pageheap.free_bytes / MiB,
412 stats.central_bytes, stats.central_bytes / MiB,
413 stats.transfer_bytes, stats.transfer_bytes / MiB,
414 stats.thread_bytes, stats.thread_bytes / MiB,
415 stats.metadata_bytes, stats.metadata_bytes / MiB,
416 physical_memory_used, physical_memory_used / MiB,
417 stats.pageheap.unmapped_bytes, stats.pageheap.unmapped_bytes / MiB,
418 virtual_memory_used, virtual_memory_used / MiB,
419 uint64_t(Static::span_allocator()->inuse()),
420 uint64_t(ThreadCache::HeapsInUse()),
421 uint64_t(kPageSize));
422
423 if (level >= 2) {
424 out->printf("------------------------------------------------\n");
425 out->printf("Total size of freelists for per-thread caches,\n");
426 out->printf("transfer cache, and central cache, by size class\n");
427 out->printf("------------------------------------------------\n");
428 uint64_t cumulative = 0;
429 for (int cl = 0; cl < kNumClasses; ++cl) {
430 if (class_count[cl] > 0) {
431 uint64_t class_bytes =
432 class_count[cl] * Static::sizemap()->ByteSizeForClass(cl);
433 cumulative += class_bytes;
434 out->printf("class %3d [ %8" PRIuS " bytes ] : "
435 "%8" PRIu64 " objs; %5.1f MiB; %5.1f cum MiB\n",
436 cl, Static::sizemap()->ByteSizeForClass(cl),
437 class_count[cl],
438 class_bytes / MiB,
439 cumulative / MiB);
440 }
441 }
442
443 // append page heap info
444 int nonempty_sizes = 0;
445 for (int s = 0; s < kMaxPages; s++) {
446 if (small.normal_length[s] + small.returned_length[s] > 0) {
447 nonempty_sizes++;
448 }
449 }
450 out->printf("------------------------------------------------\n");
451 out->printf("PageHeap: %d sizes; %6.1f MiB free; %6.1f MiB unmapped\n",
452 nonempty_sizes, stats.pageheap.free_bytes / MiB,
453 stats.pageheap.unmapped_bytes / MiB);
454 out->printf("------------------------------------------------\n");
455 uint64_t total_normal = 0;
456 uint64_t total_returned = 0;
457 for (int s = 0; s < kMaxPages; s++) {
458 const int n_length = small.normal_length[s];
459 const int r_length = small.returned_length[s];
460 if (n_length + r_length > 0) {
461 uint64_t n_pages = s * n_length;
462 uint64_t r_pages = s * r_length;
463 total_normal += n_pages;
464 total_returned += r_pages;
465 out->printf("%6u pages * %6u spans ~ %6.1f MiB; %6.1f MiB cum"
466 "; unmapped: %6.1f MiB; %6.1f MiB cum\n",
467 s,
468 (n_length + r_length),
469 PagesToMiB(n_pages + r_pages),
470 PagesToMiB(total_normal + total_returned),
471 PagesToMiB(r_pages),
472 PagesToMiB(total_returned));
473 }
474 }
475
476 total_normal += large.normal_pages;
477 total_returned += large.returned_pages;
478 out->printf(">255 large * %6u spans ~ %6.1f MiB; %6.1f MiB cum"
479 "; unmapped: %6.1f MiB; %6.1f MiB cum\n",
480 static_cast<unsigned int>(large.spans),
481 PagesToMiB(large.normal_pages + large.returned_pages),
482 PagesToMiB(total_normal + total_returned),
483 PagesToMiB(large.returned_pages),
484 PagesToMiB(total_returned));
485 }
486}
487
488static void PrintStats(int level) {
489 const int kBufferSize = 16 << 10;
490 char* buffer = new char[kBufferSize];
491 TCMalloc_Printer printer(buffer, kBufferSize);
492 DumpStats(&printer, level);
493 write(STDERR_FILENO, buffer, strlen(buffer));
494 delete[] buffer;
495}
496
497static void** DumpHeapGrowthStackTraces() {
498 // Count how much space we need
499 int needed_slots = 0;
500 {
501 SpinLockHolder h(Static::pageheap_lock());
502 for (StackTrace* t = Static::growth_stacks();
503 t != NULL;
504 t = reinterpret_cast<StackTrace*>(
505 t->stack[tcmalloc::kMaxStackDepth-1])) {
506 needed_slots += 3 + t->depth;
507 }
508 needed_slots += 100; // Slop in case list grows
509 needed_slots += needed_slots/8; // An extra 12.5% slop
510 }
511
512 void** result = new void*[needed_slots];
513 if (result == NULL) {
514 Log(kLog, __FILE__, __LINE__,
515 "tcmalloc: allocation failed for stack trace slots",
516 needed_slots * sizeof(*result));
517 return NULL;
518 }
519
520 SpinLockHolder h(Static::pageheap_lock());
521 int used_slots = 0;
522 for (StackTrace* t = Static::growth_stacks();
523 t != NULL;
524 t = reinterpret_cast<StackTrace*>(
525 t->stack[tcmalloc::kMaxStackDepth-1])) {
526 ASSERT(used_slots < needed_slots); // Need to leave room for terminator
527 if (used_slots + 3 + t->depth >= needed_slots) {
528 // No more room
529 break;
530 }
531
532 result[used_slots+0] = reinterpret_cast<void*>(static_cast<uintptr_t>(1));
533 result[used_slots+1] = reinterpret_cast<void*>(t->size);
534 result[used_slots+2] = reinterpret_cast<void*>(t->depth);
535 for (int d = 0; d < t->depth; d++) {
536 result[used_slots+3+d] = t->stack[d];
537 }
538 used_slots += 3 + t->depth;
539 }
540 result[used_slots] = reinterpret_cast<void*>(static_cast<uintptr_t>(0));
541 return result;
542}
543
544static void IterateOverRanges(void* arg, MallocExtension::RangeFunction func) {
545 PageID page = 1; // Some code may assume that page==0 is never used
546 bool done = false;
547 while (!done) {
548 // Accumulate a small number of ranges in a local buffer
549 static const int kNumRanges = 16;
550 static base::MallocRange ranges[kNumRanges];
551 int n = 0;
552 {
553 SpinLockHolder h(Static::pageheap_lock());
554 while (n < kNumRanges) {
555 if (!Static::pageheap()->GetNextRange(page, &ranges[n])) {
556 done = true;
557 break;
558 } else {
559 uintptr_t limit = ranges[n].address + ranges[n].length;
560 page = (limit + kPageSize - 1) >> kPageShift;
561 n++;
562 }
563 }
564 }
565
566 for (int i = 0; i < n; i++) {
567 (*func)(arg, &ranges[i]);
568 }
569 }
570}
571
572// TCMalloc's support for extra malloc interfaces
573class TCMallocImplementation : public MallocExtension {
574 private:
575 // ReleaseToSystem() might release more than the requested bytes because
576 // the page heap releases at the span granularity, and spans are of wildly
577 // different sizes. This member keeps track of the extra bytes bytes
578 // released so that the app can periodically call ReleaseToSystem() to
579 // release memory at a constant rate.
580 // NOTE: Protected by Static::pageheap_lock().
581 size_t extra_bytes_released_;
582
583 public:
584 TCMallocImplementation()
585 : extra_bytes_released_(0) {
586 }
587
588 virtual void GetStats(char* buffer, int buffer_length) {
589 ASSERT(buffer_length > 0);
590 TCMalloc_Printer printer(buffer, buffer_length);
591
592 // Print level one stats unless lots of space is available
593 if (buffer_length < 10000) {
594 DumpStats(&printer, 1);
595 } else {
596 DumpStats(&printer, 2);
597 }
598 }
599
600 // We may print an extra, tcmalloc-specific warning message here.
601 virtual void GetHeapSample(MallocExtensionWriter* writer) {
602 if (FLAGS_tcmalloc_sample_parameter == 0) {
603 const char* const kWarningMsg =
604 "%warn\n"
605 "%warn This heap profile does not have any data in it, because\n"
606 "%warn the application was run with heap sampling turned off.\n"
607 "%warn To get useful data from GetHeapSample(), you must\n"
608 "%warn set the environment variable TCMALLOC_SAMPLE_PARAMETER to\n"
609 "%warn a positive sampling period, such as 524288.\n"
610 "%warn\n";
611 writer->append(kWarningMsg, strlen(kWarningMsg));
612 }
613 MallocExtension::GetHeapSample(writer);
614 }
615
616 virtual void** ReadStackTraces(int* sample_period) {
617 tcmalloc::StackTraceTable table;
618 {
619 SpinLockHolder h(Static::pageheap_lock());
620 Span* sampled = Static::sampled_objects();
621 for (Span* s = sampled->next; s != sampled; s = s->next) {
622 table.AddTrace(*reinterpret_cast<StackTrace*>(s->objects));
623 }
624 }
625 *sample_period = ThreadCache::GetCache()->GetSamplePeriod();
626 return table.ReadStackTracesAndClear(); // grabs and releases pageheap_lock
627 }
628
629 virtual void** ReadHeapGrowthStackTraces() {
630 return DumpHeapGrowthStackTraces();
631 }
632
633 virtual void Ranges(void* arg, RangeFunction func) {
634 IterateOverRanges(arg, func);
635 }
636
637 virtual bool GetNumericProperty(const char* name, size_t* value) {
638 ASSERT(name != NULL);
639
640 if (strcmp(name, "generic.current_allocated_bytes") == 0) {
641 TCMallocStats stats;
642 ExtractStats(&stats, NULL, NULL, NULL);
643 *value = stats.pageheap.system_bytes
644 - stats.thread_bytes
645 - stats.central_bytes
646 - stats.transfer_bytes
647 - stats.pageheap.free_bytes
648 - stats.pageheap.unmapped_bytes;
649 return true;
650 }
651
652 if (strcmp(name, "generic.heap_size") == 0) {
653 TCMallocStats stats;
654 ExtractStats(&stats, NULL, NULL, NULL);
655 *value = stats.pageheap.system_bytes;
656 return true;
657 }
658
659 if (strcmp(name, "tcmalloc.slack_bytes") == 0) {
660 // Kept for backwards compatibility. Now defined externally as:
661 // pageheap_free_bytes + pageheap_unmapped_bytes.
662 SpinLockHolder l(Static::pageheap_lock());
663 PageHeap::Stats stats = Static::pageheap()->stats();
664 *value = stats.free_bytes + stats.unmapped_bytes;
665 return true;
666 }
667
668 if (strcmp(name, "tcmalloc.central_cache_free_bytes") == 0) {
669 TCMallocStats stats;
670 ExtractStats(&stats, NULL, NULL, NULL);
671 *value = stats.central_bytes;
672 return true;
673 }
674
675 if (strcmp(name, "tcmalloc.transfer_cache_free_bytes") == 0) {
676 TCMallocStats stats;
677 ExtractStats(&stats, NULL, NULL, NULL);
678 *value = stats.transfer_bytes;
679 return true;
680 }
681
682 if (strcmp(name, "tcmalloc.thread_cache_free_bytes") == 0) {
683 TCMallocStats stats;
684 ExtractStats(&stats, NULL, NULL, NULL);
685 *value = stats.thread_bytes;
686 return true;
687 }
688
689 if (strcmp(name, "tcmalloc.pageheap_free_bytes") == 0) {
690 SpinLockHolder l(Static::pageheap_lock());
691 *value = Static::pageheap()->stats().free_bytes;
692 return true;
693 }
694
695 if (strcmp(name, "tcmalloc.pageheap_unmapped_bytes") == 0) {
696 SpinLockHolder l(Static::pageheap_lock());
697 *value = Static::pageheap()->stats().unmapped_bytes;
698 return true;
699 }
700
701 if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) {
702 SpinLockHolder l(Static::pageheap_lock());
703 *value = ThreadCache::overall_thread_cache_size();
704 return true;
705 }
706
707 if (strcmp(name, "tcmalloc.current_total_thread_cache_bytes") == 0) {
708 TCMallocStats stats;
709 ExtractStats(&stats, NULL, NULL, NULL);
710 *value = stats.thread_bytes;
711 return true;
712 }
713
714 if (strcmp(name, "tcmalloc.aggressive_memory_decommit") == 0) {
715 *value = size_t(Static::pageheap()->GetAggressiveDecommit());
716 return true;
717 }
718
719 return false;
720 }
721
722 virtual bool SetNumericProperty(const char* name, size_t value) {
723 ASSERT(name != NULL);
724
725 if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) {
726 SpinLockHolder l(Static::pageheap_lock());
727 ThreadCache::set_overall_thread_cache_size(value);
728 return true;
729 }
730
731 if (strcmp(name, "tcmalloc.aggressive_memory_decommit") == 0) {
732 Static::pageheap()->SetAggressiveDecommit(value != 0);
733 return true;
734 }
735
736 return false;
737 }
738
739 virtual void MarkThreadIdle() {
740 ThreadCache::BecomeIdle();
741 }
742
743 virtual void MarkThreadBusy(); // Implemented below
744
745 virtual SysAllocator* GetSystemAllocator() {
746 SpinLockHolder h(Static::pageheap_lock());
747 return sys_alloc;
748 }
749
750 virtual void SetSystemAllocator(SysAllocator* alloc) {
751 SpinLockHolder h(Static::pageheap_lock());
752 sys_alloc = alloc;
753 }
754
755 virtual void ReleaseToSystem(size_t num_bytes) {
756 SpinLockHolder h(Static::pageheap_lock());
757 if (num_bytes <= extra_bytes_released_) {
758 // We released too much on a prior call, so don't release any
759 // more this time.
760 extra_bytes_released_ = extra_bytes_released_ - num_bytes;
761 return;
762 }
763 num_bytes = num_bytes - extra_bytes_released_;
764 // num_bytes might be less than one page. If we pass zero to
765 // ReleaseAtLeastNPages, it won't do anything, so we release a whole
766 // page now and let extra_bytes_released_ smooth it out over time.
767 Length num_pages = max<Length>(num_bytes >> kPageShift, 1);
768 size_t bytes_released = Static::pageheap()->ReleaseAtLeastNPages(
769 num_pages) << kPageShift;
770 if (bytes_released > num_bytes) {
771 extra_bytes_released_ = bytes_released - num_bytes;
772 } else {
773 // The PageHeap wasn't able to release num_bytes. Don't try to
774 // compensate with a big release next time. Specifically,
775 // ReleaseFreeMemory() calls ReleaseToSystem(LONG_MAX).
776 extra_bytes_released_ = 0;
777 }
778 }
779
780 virtual void SetMemoryReleaseRate(double rate) {
781 FLAGS_tcmalloc_release_rate = rate;
782 }
783
784 virtual double GetMemoryReleaseRate() {
785 return FLAGS_tcmalloc_release_rate;
786 }
787 virtual size_t GetEstimatedAllocatedSize(size_t size) {
788 if (size <= kMaxSize) {
789 const size_t cl = Static::sizemap()->SizeClass(size);
790 const size_t alloc_size = Static::sizemap()->ByteSizeForClass(cl);
791 return alloc_size;
792 } else {
793 return tcmalloc::pages(size) << kPageShift;
794 }
795 }
796
797 // This just calls GetSizeWithCallback, but because that's in an
798 // unnamed namespace, we need to move the definition below it in the
799 // file.
800 virtual size_t GetAllocatedSize(const void* ptr);
801
802 // This duplicates some of the logic in GetSizeWithCallback, but is
803 // faster. This is important on OS X, where this function is called
804 // on every allocation operation.
805 virtual Ownership GetOwnership(const void* ptr) {
806 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
807 // The rest of tcmalloc assumes that all allocated pointers use at
808 // most kAddressBits bits. If ptr doesn't, then it definitely
809 // wasn't alloacted by tcmalloc.
810 if ((p >> (kAddressBits - kPageShift)) > 0) {
811 return kNotOwned;
812 }
813 size_t cl = Static::pageheap()->GetSizeClassIfCached(p);
814 if (cl != 0) {
815 return kOwned;
816 }
817 const Span *span = Static::pageheap()->GetDescriptor(p);
818 return span ? kOwned : kNotOwned;
819 }
820
821 virtual void GetFreeListSizes(vector<MallocExtension::FreeListInfo>* v) {
822 static const char* kCentralCacheType = "tcmalloc.central";
823 static const char* kTransferCacheType = "tcmalloc.transfer";
824 static const char* kThreadCacheType = "tcmalloc.thread";
825 static const char* kPageHeapType = "tcmalloc.page";
826 static const char* kPageHeapUnmappedType = "tcmalloc.page_unmapped";
827 static const char* kLargeSpanType = "tcmalloc.large";
828 static const char* kLargeUnmappedSpanType = "tcmalloc.large_unmapped";
829
830 v->clear();
831
832 // central class information
833 int64 prev_class_size = 0;
834 for (int cl = 1; cl < kNumClasses; ++cl) {
835 size_t class_size = Static::sizemap()->ByteSizeForClass(cl);
836 MallocExtension::FreeListInfo i;
837 i.min_object_size = prev_class_size + 1;
838 i.max_object_size = class_size;
839 i.total_bytes_free =
840 Static::central_cache()[cl].length() * class_size;
841 i.type = kCentralCacheType;
842 v->push_back(i);
843
844 // transfer cache
845 i.total_bytes_free =
846 Static::central_cache()[cl].tc_length() * class_size;
847 i.type = kTransferCacheType;
848 v->push_back(i);
849
850 prev_class_size = Static::sizemap()->ByteSizeForClass(cl);
851 }
852
853 // Add stats from per-thread heaps
854 uint64_t class_count[kNumClasses];
855 memset(class_count, 0, sizeof(class_count));
856 {
857 SpinLockHolder h(Static::pageheap_lock());
858 uint64_t thread_bytes = 0;
859 ThreadCache::GetThreadStats(&thread_bytes, class_count);
860 }
861
862 prev_class_size = 0;
863 for (int cl = 1; cl < kNumClasses; ++cl) {
864 MallocExtension::FreeListInfo i;
865 i.min_object_size = prev_class_size + 1;
866 i.max_object_size = Static::sizemap()->ByteSizeForClass(cl);
867 i.total_bytes_free =
868 class_count[cl] * Static::sizemap()->ByteSizeForClass(cl);
869 i.type = kThreadCacheType;
870 v->push_back(i);
871 }
872
873 // append page heap info
874 PageHeap::SmallSpanStats small;
875 PageHeap::LargeSpanStats large;
876 {
877 SpinLockHolder h(Static::pageheap_lock());
878 Static::pageheap()->GetSmallSpanStats(&small);
879 Static::pageheap()->GetLargeSpanStats(&large);
880 }
881
882 // large spans: mapped
883 MallocExtension::FreeListInfo span_info;
884 span_info.type = kLargeSpanType;
885 span_info.max_object_size = (numeric_limits<size_t>::max)();
886 span_info.min_object_size = kMaxPages << kPageShift;
887 span_info.total_bytes_free = large.normal_pages << kPageShift;
888 v->push_back(span_info);
889
890 // large spans: unmapped
891 span_info.type = kLargeUnmappedSpanType;
892 span_info.total_bytes_free = large.returned_pages << kPageShift;
893 v->push_back(span_info);
894
895 // small spans
896 for (int s = 1; s < kMaxPages; s++) {
897 MallocExtension::FreeListInfo i;
898 i.max_object_size = (s << kPageShift);
899 i.min_object_size = ((s - 1) << kPageShift);
900
901 i.type = kPageHeapType;
902 i.total_bytes_free = (s << kPageShift) * small.normal_length[s];
903 v->push_back(i);
904
905 i.type = kPageHeapUnmappedType;
906 i.total_bytes_free = (s << kPageShift) * small.returned_length[s];
907 v->push_back(i);
908 }
909 }
910};
911
912// The constructor allocates an object to ensure that initialization
913// runs before main(), and therefore we do not have a chance to become
914// multi-threaded before initialization. We also create the TSD key
915// here. Presumably by the time this constructor runs, glibc is in
916// good enough shape to handle pthread_key_create().
917//
918// The constructor also takes the opportunity to tell STL to use
919// tcmalloc. We want to do this early, before construct time, so
920// all user STL allocations go through tcmalloc (which works really
921// well for STL).
922//
923// The destructor prints stats when the program exits.
924static int tcmallocguard_refcount = 0; // no lock needed: runs before main()
925TCMallocGuard::TCMallocGuard() {
926 if (tcmallocguard_refcount++ == 0) {
927 ReplaceSystemAlloc(); // defined in libc_override_*.h
928 tc_free(tc_malloc(1));
929 ThreadCache::InitTSD();
930 tc_free(tc_malloc(1));
931 // Either we, or debugallocation.cc, or valgrind will control memory
932 // management. We register our extension if we're the winner.
933#ifdef TCMALLOC_USING_DEBUGALLOCATION
934 // Let debugallocation register its extension.
935#else
936 if (RunningOnValgrind()) {
937 // Let Valgrind uses its own malloc (so don't register our extension).
938 } else {
939 MallocExtension::Register(new TCMallocImplementation);
940 }
941#endif
942 }
943}
944
945TCMallocGuard::~TCMallocGuard() {
946 if (--tcmallocguard_refcount == 0) {
947 const char* env = NULL;
948 if (!RunningOnValgrind()) {
949 // Valgrind uses it's own malloc so we cannot do MALLOCSTATS
950 env = getenv("MALLOCSTATS");
951 }
952 if (env != NULL) {
953 int level = atoi(env);
954 if (level < 1) level = 1;
955 PrintStats(level);
956 }
957 }
958}
959#ifndef WIN32_OVERRIDE_ALLOCATORS
960static TCMallocGuard module_enter_exit_hook;
961#endif
962
963//-------------------------------------------------------------------
964// Helpers for the exported routines below
965//-------------------------------------------------------------------
966
967static inline bool CheckCachedSizeClass(void *ptr) {
968 PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
969 size_t cached_value = Static::pageheap()->GetSizeClassIfCached(p);
970 return cached_value == 0 ||
971 cached_value == Static::pageheap()->GetDescriptor(p)->sizeclass;
972}
973
974static inline void* CheckedMallocResult(void *result) {
975 ASSERT(result == NULL || CheckCachedSizeClass(result));
976 return result;
977}
978
979static inline void* SpanToMallocResult(Span *span) {
980 Static::pageheap()->CacheSizeClass(span->start, 0);
981 return
982 CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift));
983}
984
985static void* DoSampledAllocation(size_t size) {
986 // Grab the stack trace outside the heap lock
987 StackTrace tmp;
988 tmp.depth = GetStackTrace(tmp.stack, tcmalloc::kMaxStackDepth, 1);
989 tmp.size = size;
990
991 SpinLockHolder h(Static::pageheap_lock());
992 // Allocate span
993 Span *span = Static::pageheap()->New(tcmalloc::pages(size == 0 ? 1 : size));
994 if (UNLIKELY(span == NULL)) {
995 return NULL;
996 }
997
998 // Allocate stack trace
999 StackTrace *stack = Static::stacktrace_allocator()->New();
1000 if (UNLIKELY(stack == NULL)) {
1001 // Sampling failed because of lack of memory
1002 return span;
1003 }
1004 *stack = tmp;
1005 span->sample = 1;
1006 span->objects = stack;
1007 tcmalloc::DLL_Prepend(Static::sampled_objects(), span);
1008
1009 return SpanToMallocResult(span);
1010}
1011
1012namespace {
1013
1014typedef void* (*malloc_fn)(void *arg);
1015
1016SpinLock set_new_handler_lock(SpinLock::LINKER_INITIALIZED);
1017
1018void* handle_oom(malloc_fn retry_fn,
1019 void* retry_arg,
1020 bool from_operator,
1021 bool nothrow) {
1022 if (!from_operator && !tc_new_mode) {
1023 // we're out of memory in C library function (malloc etc) and no
1024 // "new mode" forced on us. Just return NULL
1025 return NULL;
1026 }
1027 // we're OOM in operator new or "new mode" is set. We might have to
1028 // call new_handle and maybe retry allocation.
1029
1030 for (;;) {
1031 // Get the current new handler. NB: this function is not
1032 // thread-safe. We make a feeble stab at making it so here, but
1033 // this lock only protects against tcmalloc interfering with
1034 // itself, not with other libraries calling set_new_handler.
1035 std::new_handler nh;
1036 {
1037 SpinLockHolder h(&set_new_handler_lock);
1038 nh = std::set_new_handler(0);
1039 (void) std::set_new_handler(nh);
1040 }
1041#if (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS)
1042 if (!nh) {
1043 return NULL;
1044 }
1045 // Since exceptions are disabled, we don't really know if new_handler
1046 // failed. Assume it will abort if it fails.
1047 (*nh)();
1048#else
1049 // If no new_handler is established, the allocation failed.
1050 if (!nh) {
1051 if (nothrow) {
1052 return NULL;
1053 }
1054 throw std::bad_alloc();
1055 }
1056 // Otherwise, try the new_handler. If it returns, retry the
1057 // allocation. If it throws std::bad_alloc, fail the allocation.
1058 // if it throws something else, don't interfere.
1059 try {
1060 (*nh)();
1061 } catch (const std::bad_alloc&) {
1062 if (!nothrow) throw;
1063 return NULL;
1064 }
1065#endif // (defined(__GNUC__) && !defined(__EXCEPTIONS)) || (defined(_HAS_EXCEPTIONS) && !_HAS_EXCEPTIONS)
1066
1067 // we get here if new_handler returns successfully. So we retry
1068 // allocation.
1069 void* rv = retry_fn(retry_arg);
1070 if (rv != NULL) {
1071 return rv;
1072 }
1073
1074 // if allocation failed again we go to next loop iteration
1075 }
1076}
1077
1078// Copy of FLAGS_tcmalloc_large_alloc_report_threshold with
1079// automatic increases factored in.
1080static int64_t large_alloc_threshold =
1081 (kPageSize > FLAGS_tcmalloc_large_alloc_report_threshold
1082 ? kPageSize : FLAGS_tcmalloc_large_alloc_report_threshold);
1083
1084static void ReportLargeAlloc(Length num_pages, void* result) {
1085 StackTrace stack;
1086 stack.depth = GetStackTrace(stack.stack, tcmalloc::kMaxStackDepth, 1);
1087
1088 static const int N = 1000;
1089 char buffer[N];
1090 TCMalloc_Printer printer(buffer, N);
1091 printer.printf("tcmalloc: large alloc %" PRIu64 " bytes == %p @ ",
1092 static_cast<uint64>(num_pages) << kPageShift,
1093 result);
1094 for (int i = 0; i < stack.depth; i++) {
1095 printer.printf(" %p", stack.stack[i]);
1096 }
1097 printer.printf("\n");
1098 write(STDERR_FILENO, buffer, strlen(buffer));
1099}
1100
1101void* do_memalign(size_t align, size_t size);
1102
1103struct retry_memaligh_data {
1104 size_t align;
1105 size_t size;
1106};
1107
1108static void *retry_do_memalign(void *arg) {
1109 retry_memaligh_data *data = static_cast<retry_memaligh_data *>(arg);
1110 return do_memalign(data->align, data->size);
1111}
1112
1113static void *maybe_do_cpp_memalign_slow(size_t align, size_t size) {
1114 retry_memaligh_data data;
1115 data.align = align;
1116 data.size = size;
1117 return handle_oom(retry_do_memalign, &data,
1118 false, true);
1119}
1120
1121inline void* do_memalign_or_cpp_memalign(size_t align, size_t size) {
1122 void *rv = do_memalign(align, size);
1123 if (LIKELY(rv != NULL)) {
1124 return rv;
1125 }
1126 return maybe_do_cpp_memalign_slow(align, size);
1127}
1128
1129// Must be called with the page lock held.
1130inline bool should_report_large(Length num_pages) {
1131 const int64 threshold = large_alloc_threshold;
1132 if (threshold > 0 && num_pages >= (threshold >> kPageShift)) {
1133 // Increase the threshold by 1/8 every time we generate a report.
1134 // We cap the threshold at 8GiB to avoid overflow problems.
1135 large_alloc_threshold = (threshold + threshold/8 < 8ll<<30
1136 ? threshold + threshold/8 : 8ll<<30);
1137 return true;
1138 }
1139 return false;
1140}
1141
1142// Helper for do_malloc().
1143inline void* do_malloc_pages(ThreadCache* heap, size_t size) {
1144 void* result;
1145 bool report_large;
1146
1147 Length num_pages = tcmalloc::pages(size);
1148 size = num_pages << kPageShift;
1149
1150 if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
1151 result = DoSampledAllocation(size);
1152
1153 SpinLockHolder h(Static::pageheap_lock());
1154 report_large = should_report_large(num_pages);
1155 } else {
1156 SpinLockHolder h(Static::pageheap_lock());
1157 Span* span = Static::pageheap()->New(num_pages);
1158 result = (UNLIKELY(span == NULL) ? NULL : SpanToMallocResult(span));
1159 report_large = should_report_large(num_pages);
1160 }
1161
1162 if (report_large) {
1163 ReportLargeAlloc(num_pages, result);
1164 }
1165 return result;
1166}
1167
1168ALWAYS_INLINE void* do_malloc_small(ThreadCache* heap, size_t size) {
1169 ASSERT(Static::IsInited());
1170 ASSERT(heap != NULL);
1171 size_t cl = Static::sizemap()->SizeClass(size);
1172 size = Static::sizemap()->class_to_size(cl);
1173
1174 if (UNLIKELY(FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
1175 return DoSampledAllocation(size);
1176 } else {
1177 // The common case, and also the simplest. This just pops the
1178 // size-appropriate freelist, after replenishing it if it's empty.
1179 return CheckedMallocResult(heap->Allocate(size, cl));
1180 }
1181}
1182
1183ALWAYS_INLINE void* do_malloc(size_t size) {
1184 if (ThreadCache::have_tls &&
1185 LIKELY(size < ThreadCache::MinSizeForSlowPath())) {
1186 return do_malloc_small(ThreadCache::GetCacheWhichMustBePresent(), size);
1187 } else if (size <= kMaxSize) {
1188 return do_malloc_small(ThreadCache::GetCache(), size);
1189 } else {
1190 return do_malloc_pages(ThreadCache::GetCache(), size);
1191 }
1192}
1193
1194static void *retry_malloc(void* size) {
1195 return do_malloc(reinterpret_cast<size_t>(size));
1196}
1197
1198ALWAYS_INLINE void* do_malloc_or_cpp_alloc(size_t size) {
1199 void *rv = do_malloc(size);
1200 if (LIKELY(rv != NULL)) {
1201 return rv;
1202 }
1203 return handle_oom(retry_malloc, reinterpret_cast<void *>(size),
1204 false, true);
1205}
1206
1207ALWAYS_INLINE void* do_calloc(size_t n, size_t elem_size) {
1208 // Overflow check
1209 const size_t size = n * elem_size;
1210 if (elem_size != 0 && size / elem_size != n) return NULL;
1211
1212 void* result = do_malloc_or_cpp_alloc(size);
1213 if (result != NULL) {
1214 memset(result, 0, size);
1215 }
1216 return result;
1217}
1218
1219// If ptr is NULL, do nothing. Otherwise invoke the given function.
1220inline void free_null_or_invalid(void* ptr, void (*invalid_free_fn)(void*)) {
1221 if (ptr != NULL) {
1222 (*invalid_free_fn)(ptr);
1223 }
1224}
1225
1226// Helper for do_free_with_callback(), below. Inputs:
1227// ptr is object to be freed
1228// invalid_free_fn is a function that gets invoked on certain "bad frees"
1229// heap is the ThreadCache for this thread, or NULL if it isn't known
1230// heap_must_be_valid is whether heap is known to be non-NULL
1231//
1232// This function may only be used after Static::IsInited() is true.
1233//
1234// We can usually detect the case where ptr is not pointing to a page that
1235// tcmalloc is using, and in those cases we invoke invalid_free_fn.
1236//
1237// To maximize speed in the common case, we usually get here with
1238// heap_must_be_valid being a manifest constant equal to true.
1239ALWAYS_INLINE void do_free_helper(void* ptr,
1240 void (*invalid_free_fn)(void*),
1241 ThreadCache* heap,
1242 bool heap_must_be_valid) {
1243 ASSERT((Static::IsInited() && heap != NULL) || !heap_must_be_valid);
1244 if (!heap_must_be_valid && !Static::IsInited()) {
1245 // We called free() before malloc(). This can occur if the
1246 // (system) malloc() is called before tcmalloc is loaded, and then
1247 // free() is called after tcmalloc is loaded (and tc_free has
1248 // replaced free), but before the global constructor has run that
1249 // sets up the tcmalloc data structures.
1250 free_null_or_invalid(ptr, invalid_free_fn);
1251 return;
1252 }
1253 Span* span = NULL;
1254 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
1255 size_t cl = Static::pageheap()->GetSizeClassIfCached(p);
1256 if (UNLIKELY(cl == 0)) {
1257 span = Static::pageheap()->GetDescriptor(p);
1258 if (UNLIKELY(!span)) {
1259 // span can be NULL because the pointer passed in is NULL or invalid
1260 // (not something returned by malloc or friends), or because the
1261 // pointer was allocated with some other allocator besides
1262 // tcmalloc. The latter can happen if tcmalloc is linked in via
1263 // a dynamic library, but is not listed last on the link line.
1264 // In that case, libraries after it on the link line will
1265 // allocate with libc malloc, but free with tcmalloc's free.
1266 free_null_or_invalid(ptr, invalid_free_fn);
1267 return;
1268 }
1269 cl = span->sizeclass;
1270 Static::pageheap()->CacheSizeClass(p, cl);
1271 }
1272 ASSERT(ptr != NULL);
1273 if (LIKELY(cl != 0)) {
1274 ASSERT(!Static::pageheap()->GetDescriptor(p)->sample);
1275 if (heap_must_be_valid || heap != NULL) {
1276 heap->Deallocate(ptr, cl);
1277 } else {
1278 // Delete directly into central cache
1279 tcmalloc::SLL_SetNext(ptr, NULL);
1280 Static::central_cache()[cl].InsertRange(ptr, ptr, 1);
1281 }
1282 } else {
1283 SpinLockHolder h(Static::pageheap_lock());
1284 ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0);
1285 ASSERT(span != NULL && span->start == p);
1286 if (span->sample) {
1287 StackTrace* st = reinterpret_cast<StackTrace*>(span->objects);
1288 tcmalloc::DLL_Remove(span);
1289 Static::stacktrace_allocator()->Delete(st);
1290 span->objects = NULL;
1291 }
1292 Static::pageheap()->Delete(span);
1293 }
1294}
1295
1296// Helper for the object deletion (free, delete, etc.). Inputs:
1297// ptr is object to be freed
1298// invalid_free_fn is a function that gets invoked on certain "bad frees"
1299//
1300// We can usually detect the case where ptr is not pointing to a page that
1301// tcmalloc is using, and in those cases we invoke invalid_free_fn.
1302ALWAYS_INLINE void do_free_with_callback(void* ptr,
1303 void (*invalid_free_fn)(void*)) {
1304 ThreadCache* heap = NULL;
1305 if (LIKELY(ThreadCache::IsFastPathAllowed())) {
1306 heap = ThreadCache::GetCacheWhichMustBePresent();
1307 do_free_helper(ptr, invalid_free_fn, heap, true);
1308 } else {
1309 heap = ThreadCache::GetCacheIfPresent();
1310 do_free_helper(ptr, invalid_free_fn, heap, false);
1311 }
1312}
1313
1314// The default "do_free" that uses the default callback.
1315ALWAYS_INLINE void do_free(void* ptr) {
1316 return do_free_with_callback(ptr, &InvalidFree);
1317}
1318
1319// NOTE: some logic here is duplicated in GetOwnership (above), for
1320// speed. If you change this function, look at that one too.
1321inline size_t GetSizeWithCallback(const void* ptr,
1322 size_t (*invalid_getsize_fn)(const void*)) {
1323 if (ptr == NULL)
1324 return 0;
1325 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
1326 size_t cl = Static::pageheap()->GetSizeClassIfCached(p);
1327 if (cl != 0) {
1328 return Static::sizemap()->ByteSizeForClass(cl);
1329 } else {
1330 const Span *span = Static::pageheap()->GetDescriptor(p);
1331 if (UNLIKELY(span == NULL)) { // means we do not own this memory
1332 return (*invalid_getsize_fn)(ptr);
1333 } else if (span->sizeclass != 0) {
1334 Static::pageheap()->CacheSizeClass(p, span->sizeclass);
1335 return Static::sizemap()->ByteSizeForClass(span->sizeclass);
1336 } else {
1337 return span->length << kPageShift;
1338 }
1339 }
1340}
1341
1342// This lets you call back to a given function pointer if ptr is invalid.
1343// It is used primarily by windows code which wants a specialized callback.
1344ALWAYS_INLINE void* do_realloc_with_callback(
1345 void* old_ptr, size_t new_size,
1346 void (*invalid_free_fn)(void*),
1347 size_t (*invalid_get_size_fn)(const void*)) {
1348 // Get the size of the old entry
1349 const size_t old_size = GetSizeWithCallback(old_ptr, invalid_get_size_fn);
1350
1351 // Reallocate if the new size is larger than the old size,
1352 // or if the new size is significantly smaller than the old size.
1353 // We do hysteresis to avoid resizing ping-pongs:
1354 // . If we need to grow, grow to max(new_size, old_size * 1.X)
1355 // . Don't shrink unless new_size < old_size * 0.Y
1356 // X and Y trade-off time for wasted space. For now we do 1.25 and 0.5.
1357 const size_t lower_bound_to_grow = old_size + old_size / 4ul;
1358 const size_t upper_bound_to_shrink = old_size / 2ul;
1359 if ((new_size > old_size) || (new_size < upper_bound_to_shrink)) {
1360 // Need to reallocate.
1361 void* new_ptr = NULL;
1362
1363 if (new_size > old_size && new_size < lower_bound_to_grow) {
1364 new_ptr = do_malloc_or_cpp_alloc(lower_bound_to_grow);
1365 }
1366 if (new_ptr == NULL) {
1367 // Either new_size is not a tiny increment, or last do_malloc failed.
1368 new_ptr = do_malloc_or_cpp_alloc(new_size);
1369 }
1370 if (UNLIKELY(new_ptr == NULL)) {
1371 return NULL;
1372 }
1373 MallocHook::InvokeNewHook(new_ptr, new_size);
1374 memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size));
1375 MallocHook::InvokeDeleteHook(old_ptr);
1376 // We could use a variant of do_free() that leverages the fact
1377 // that we already know the sizeclass of old_ptr. The benefit
1378 // would be small, so don't bother.
1379 do_free_with_callback(old_ptr, invalid_free_fn);
1380 return new_ptr;
1381 } else {
1382 // We still need to call hooks to report the updated size:
1383 MallocHook::InvokeDeleteHook(old_ptr);
1384 MallocHook::InvokeNewHook(old_ptr, new_size);
1385 return old_ptr;
1386 }
1387}
1388
1389ALWAYS_INLINE void* do_realloc(void* old_ptr, size_t new_size) {
1390 return do_realloc_with_callback(old_ptr, new_size,
1391 &InvalidFree, &InvalidGetSizeForRealloc);
1392}
1393
1394// For use by exported routines below that want specific alignments
1395//
1396// Note: this code can be slow for alignments > 16, and can
1397// significantly fragment memory. The expectation is that
1398// memalign/posix_memalign/valloc/pvalloc will not be invoked very
1399// often. This requirement simplifies our implementation and allows
1400// us to tune for expected allocation patterns.
1401void* do_memalign(size_t align, size_t size) {
1402 ASSERT((align & (align - 1)) == 0);
1403 ASSERT(align > 0);
1404 if (size + align < size) return NULL; // Overflow
1405
1406 // Fall back to malloc if we would already align this memory access properly.
1407 if (align <= AlignmentForSize(size)) {
1408 void* p = do_malloc(size);
1409 ASSERT((reinterpret_cast<uintptr_t>(p) % align) == 0);
1410 return p;
1411 }
1412
1413 if (UNLIKELY(Static::pageheap() == NULL)) ThreadCache::InitModule();
1414
1415 // Allocate at least one byte to avoid boundary conditions below
1416 if (size == 0) size = 1;
1417
1418 if (size <= kMaxSize && align < kPageSize) {
1419 // Search through acceptable size classes looking for one with
1420 // enough alignment. This depends on the fact that
1421 // InitSizeClasses() currently produces several size classes that
1422 // are aligned at powers of two. We will waste time and space if
1423 // we miss in the size class array, but that is deemed acceptable
1424 // since memalign() should be used rarely.
1425 int cl = Static::sizemap()->SizeClass(size);
1426 while (cl < kNumClasses &&
1427 ((Static::sizemap()->class_to_size(cl) & (align - 1)) != 0)) {
1428 cl++;
1429 }
1430 if (cl < kNumClasses) {
1431 ThreadCache* heap = ThreadCache::GetCache();
1432 size = Static::sizemap()->class_to_size(cl);
1433 return CheckedMallocResult(heap->Allocate(size, cl));
1434 }
1435 }
1436
1437 // We will allocate directly from the page heap
1438 SpinLockHolder h(Static::pageheap_lock());
1439
1440 if (align <= kPageSize) {
1441 // Any page-level allocation will be fine
1442 // TODO: We could put the rest of this page in the appropriate
1443 // TODO: cache but it does not seem worth it.
1444 Span* span = Static::pageheap()->New(tcmalloc::pages(size));
1445 return UNLIKELY(span == NULL) ? NULL : SpanToMallocResult(span);
1446 }
1447
1448 // Allocate extra pages and carve off an aligned portion
1449 const Length alloc = tcmalloc::pages(size + align);
1450 Span* span = Static::pageheap()->New(alloc);
1451 if (UNLIKELY(span == NULL)) return NULL;
1452
1453 // Skip starting portion so that we end up aligned
1454 Length skip = 0;
1455 while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) {
1456 skip++;
1457 }
1458 ASSERT(skip < alloc);
1459 if (skip > 0) {
1460 Span* rest = Static::pageheap()->Split(span, skip);
1461 Static::pageheap()->Delete(span);
1462 span = rest;
1463 }
1464
1465 // Skip trailing portion that we do not need to return
1466 const Length needed = tcmalloc::pages(size);
1467 ASSERT(span->length >= needed);
1468 if (span->length > needed) {
1469 Span* trailer = Static::pageheap()->Split(span, needed);
1470 Static::pageheap()->Delete(trailer);
1471 }
1472 return SpanToMallocResult(span);
1473}
1474
1475// Helpers for use by exported routines below:
1476
1477inline void do_malloc_stats() {
1478 PrintStats(1);
1479}
1480
1481inline int do_mallopt(int cmd, int value) {
1482 return 1; // Indicates error
1483}
1484
1485#ifdef HAVE_STRUCT_MALLINFO
1486inline struct mallinfo do_mallinfo() {
1487 TCMallocStats stats;
1488 ExtractStats(&stats, NULL, NULL, NULL);
1489
1490 // Just some of the fields are filled in.
1491 struct mallinfo info;
1492 memset(&info, 0, sizeof(info));
1493
1494 // Unfortunately, the struct contains "int" field, so some of the
1495 // size values will be truncated.
1496 info.arena = static_cast<int>(stats.pageheap.system_bytes);
1497 info.fsmblks = static_cast<int>(stats.thread_bytes
1498 + stats.central_bytes
1499 + stats.transfer_bytes);
1500 info.fordblks = static_cast<int>(stats.pageheap.free_bytes +
1501 stats.pageheap.unmapped_bytes);
1502 info.uordblks = static_cast<int>(stats.pageheap.system_bytes
1503 - stats.thread_bytes
1504 - stats.central_bytes
1505 - stats.transfer_bytes
1506 - stats.pageheap.free_bytes
1507 - stats.pageheap.unmapped_bytes);
1508
1509 return info;
1510}
1511#endif // HAVE_STRUCT_MALLINFO
1512
1513inline void* cpp_alloc(size_t size, bool nothrow) {
1514 void* p = do_malloc(size);
1515 if (LIKELY(p)) {
1516 return p;
1517 }
1518 return handle_oom(retry_malloc, reinterpret_cast<void *>(size),
1519 true, nothrow);
1520}
1521
1522} // end unnamed namespace
1523
1524// As promised, the definition of this function, declared above.
1525size_t TCMallocImplementation::GetAllocatedSize(const void* ptr) {
1526 if (ptr == NULL)
1527 return 0;
1528 ASSERT(TCMallocImplementation::GetOwnership(ptr)
1529 != TCMallocImplementation::kNotOwned);
1530 return GetSizeWithCallback(ptr, &InvalidGetAllocatedSize);
1531}
1532
1533void TCMallocImplementation::MarkThreadBusy() {
1534 // Allocate to force the creation of a thread cache, but avoid
1535 // invoking any hooks.
1536 do_free(do_malloc(0));
1537}
1538
1539//-------------------------------------------------------------------
1540// Exported routines
1541//-------------------------------------------------------------------
1542
1543extern "C" PERFTOOLS_DLL_DECL const char* tc_version(
1544 int* major, int* minor, const char** patch) __THROW {
1545 if (major) *major = TC_VERSION_MAJOR;
1546 if (minor) *minor = TC_VERSION_MINOR;
1547 if (patch) *patch = TC_VERSION_PATCH;
1548 return TC_VERSION_STRING;
1549}
1550
1551// This function behaves similarly to MSVC's _set_new_mode.
1552// If flag is 0 (default), calls to malloc will behave normally.
1553// If flag is 1, calls to malloc will behave like calls to new,
1554// and the std_new_handler will be invoked on failure.
1555// Returns the previous mode.
1556extern "C" PERFTOOLS_DLL_DECL int tc_set_new_mode(int flag) __THROW {
1557 int old_mode = tc_new_mode;
1558 tc_new_mode = flag;
1559 return old_mode;
1560}
1561
1562#ifndef TCMALLOC_USING_DEBUGALLOCATION // debugallocation.cc defines its own
1563
1564// CAVEAT: The code structure below ensures that MallocHook methods are always
1565// called from the stack frame of the invoked allocation function.
1566// heap-checker.cc depends on this to start a stack trace from
1567// the call to the (de)allocation function.
1568
1569extern "C" PERFTOOLS_DLL_DECL void* tc_malloc(size_t size) __THROW {
1570 void* result = do_malloc_or_cpp_alloc(size);
1571 MallocHook::InvokeNewHook(result, size);
1572 return result;
1573}
1574
1575extern "C" PERFTOOLS_DLL_DECL void tc_free(void* ptr) __THROW {
1576 MallocHook::InvokeDeleteHook(ptr);
1577 do_free(ptr);
1578}
1579
1580extern "C" PERFTOOLS_DLL_DECL void* tc_calloc(size_t n,
1581 size_t elem_size) __THROW {
1582 void* result = do_calloc(n, elem_size);
1583 MallocHook::InvokeNewHook(result, n * elem_size);
1584 return result;
1585}
1586
1587extern "C" PERFTOOLS_DLL_DECL void tc_cfree(void* ptr) __THROW {
1588 MallocHook::InvokeDeleteHook(ptr);
1589 do_free(ptr);
1590}
1591
1592extern "C" PERFTOOLS_DLL_DECL void* tc_realloc(void* old_ptr,
1593 size_t new_size) __THROW {
1594 if (old_ptr == NULL) {
1595 void* result = do_malloc_or_cpp_alloc(new_size);
1596 MallocHook::InvokeNewHook(result, new_size);
1597 return result;
1598 }
1599 if (new_size == 0) {
1600 MallocHook::InvokeDeleteHook(old_ptr);
1601 do_free(old_ptr);
1602 return NULL;
1603 }
1604 return do_realloc(old_ptr, new_size);
1605}
1606
1607extern "C" PERFTOOLS_DLL_DECL void* tc_new(size_t size) {
1608 void* p = cpp_alloc(size, false);
1609 // We keep this next instruction out of cpp_alloc for a reason: when
1610 // it's in, and new just calls cpp_alloc, the optimizer may fold the
1611 // new call into cpp_alloc, which messes up our whole section-based
1612 // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc
1613 // isn't the last thing this fn calls, and prevents the folding.
1614 MallocHook::InvokeNewHook(p, size);
1615 return p;
1616}
1617
1618extern "C" PERFTOOLS_DLL_DECL void* tc_new_nothrow(size_t size, const std::nothrow_t&) __THROW {
1619 void* p = cpp_alloc(size, true);
1620 MallocHook::InvokeNewHook(p, size);
1621 return p;
1622}
1623
1624extern "C" PERFTOOLS_DLL_DECL void tc_delete(void* p) __THROW {
1625 MallocHook::InvokeDeleteHook(p);
1626 do_free(p);
1627}
1628
1629// Standard C++ library implementations define and use this
1630// (via ::operator delete(ptr, nothrow)).
1631// But it's really the same as normal delete, so we just do the same thing.
1632extern "C" PERFTOOLS_DLL_DECL void tc_delete_nothrow(void* p, const std::nothrow_t&) __THROW {
1633 MallocHook::InvokeDeleteHook(p);
1634 do_free(p);
1635}
1636
1637extern "C" PERFTOOLS_DLL_DECL void* tc_newarray(size_t size) {
1638 void* p = cpp_alloc(size, false);
1639 // We keep this next instruction out of cpp_alloc for a reason: when
1640 // it's in, and new just calls cpp_alloc, the optimizer may fold the
1641 // new call into cpp_alloc, which messes up our whole section-based
1642 // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc
1643 // isn't the last thing this fn calls, and prevents the folding.
1644 MallocHook::InvokeNewHook(p, size);
1645 return p;
1646}
1647
1648extern "C" PERFTOOLS_DLL_DECL void* tc_newarray_nothrow(size_t size, const std::nothrow_t&)
1649 __THROW {
1650 void* p = cpp_alloc(size, true);
1651 MallocHook::InvokeNewHook(p, size);
1652 return p;
1653}
1654
1655extern "C" PERFTOOLS_DLL_DECL void tc_deletearray(void* p) __THROW {
1656 MallocHook::InvokeDeleteHook(p);
1657 do_free(p);
1658}
1659
1660extern "C" PERFTOOLS_DLL_DECL void tc_deletearray_nothrow(void* p, const std::nothrow_t&) __THROW {
1661 MallocHook::InvokeDeleteHook(p);
1662 do_free(p);
1663}
1664
1665extern "C" PERFTOOLS_DLL_DECL void* tc_memalign(size_t align,
1666 size_t size) __THROW {
1667 void* result = do_memalign_or_cpp_memalign(align, size);
1668 MallocHook::InvokeNewHook(result, size);
1669 return result;
1670}
1671
1672extern "C" PERFTOOLS_DLL_DECL int tc_posix_memalign(
1673 void** result_ptr, size_t align, size_t size) __THROW {
1674 if (((align % sizeof(void*)) != 0) ||
1675 ((align & (align - 1)) != 0) ||
1676 (align == 0)) {
1677 return EINVAL;
1678 }
1679
1680 void* result = do_memalign_or_cpp_memalign(align, size);
1681 MallocHook::InvokeNewHook(result, size);
1682 if (UNLIKELY(result == NULL)) {
1683 return ENOMEM;
1684 } else {
1685 *result_ptr = result;
1686 return 0;
1687 }
1688}
1689
1690static size_t pagesize = 0;
1691
1692extern "C" PERFTOOLS_DLL_DECL void* tc_valloc(size_t size) __THROW {
1693 // Allocate page-aligned object of length >= size bytes
1694 if (pagesize == 0) pagesize = getpagesize();
1695 void* result = do_memalign_or_cpp_memalign(pagesize, size);
1696 MallocHook::InvokeNewHook(result, size);
1697 return result;
1698}
1699
1700extern "C" PERFTOOLS_DLL_DECL void* tc_pvalloc(size_t size) __THROW {
1701 // Round up size to a multiple of pagesize
1702 if (pagesize == 0) pagesize = getpagesize();
1703 if (size == 0) { // pvalloc(0) should allocate one page, according to
1704 size = pagesize; // http://man.free4web.biz/man3/libmpatrol.3.html
1705 }
1706 size = (size + pagesize - 1) & ~(pagesize - 1);
1707 void* result = do_memalign_or_cpp_memalign(pagesize, size);
1708 MallocHook::InvokeNewHook(result, size);
1709 return result;
1710}
1711
1712extern "C" PERFTOOLS_DLL_DECL void tc_malloc_stats(void) __THROW {
1713 do_malloc_stats();
1714}
1715
1716extern "C" PERFTOOLS_DLL_DECL int tc_mallopt(int cmd, int value) __THROW {
1717 return do_mallopt(cmd, value);
1718}
1719
1720#ifdef HAVE_STRUCT_MALLINFO
1721extern "C" PERFTOOLS_DLL_DECL struct mallinfo tc_mallinfo(void) __THROW {
1722 return do_mallinfo();
1723}
1724#endif
1725
1726extern "C" PERFTOOLS_DLL_DECL size_t tc_malloc_size(void* ptr) __THROW {
1727 return MallocExtension::instance()->GetAllocatedSize(ptr);
1728}
1729
1730extern "C" PERFTOOLS_DLL_DECL void* tc_malloc_skip_new_handler(size_t size) __THROW {
1731 void* result = do_malloc(size);
1732 MallocHook::InvokeNewHook(result, size);
1733 return result;
1734}
1735
1736#endif // TCMALLOC_USING_DEBUGALLOCATION