blob: 0a281c55e377bbf5b9095629694810f36bbc8229 [file] [log] [blame]
Austin Schuh745610d2015-09-06 18:19:50 -07001// -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
2// Copyright (c) 2008, Google Inc.
3// All rights reserved.
Brian Silverman20350ac2021-11-17 18:19:55 -08004//
Austin Schuh745610d2015-09-06 18:19:50 -07005// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
Brian Silverman20350ac2021-11-17 18:19:55 -08008//
Austin Schuh745610d2015-09-06 18:19:50 -07009// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
Brian Silverman20350ac2021-11-17 18:19:55 -080018//
Austin Schuh745610d2015-09-06 18:19:50 -070019// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// ---
32// All Rights Reserved.
33//
34// Author: Daniel Ford
35
36#ifndef TCMALLOC_SAMPLER_H_
37#define TCMALLOC_SAMPLER_H_
38
39#include "config.h"
40#include <stddef.h> // for size_t
41#ifdef HAVE_STDINT_H
42#include <stdint.h> // for uint64_t, uint32_t, int32_t
43#endif
44#include <string.h> // for memcpy
45#include "base/basictypes.h" // for ASSERT
46#include "internal_logging.h" // for ASSERT
Brian Silverman20350ac2021-11-17 18:19:55 -080047#include "static_vars.h"
Austin Schuh745610d2015-09-06 18:19:50 -070048
49namespace tcmalloc {
50
51//-------------------------------------------------------------------
52// Sampler to decide when to create a sample trace for an allocation
53// Not thread safe: Each thread should have it's own sampler object.
54// Caller must use external synchronization if used
55// from multiple threads.
56//
57// With 512K average sample step (the default):
58// the probability of sampling a 4K allocation is about 0.00778
59// the probability of sampling a 1MB allocation is about 0.865
60// the probability of sampling a 1GB allocation is about 1.00000
61// In general, the probablity of sampling is an allocation of size X
62// given a flag value of Y (default 1M) is:
63// 1 - e^(-X/Y)
64//
65// With 128K average sample step:
66// the probability of sampling a 1MB allocation is about 0.99966
67// the probability of sampling a 1GB allocation is about 1.0
68// (about 1 - 2**(-26))
69// With 1M average sample step:
70// the probability of sampling a 4K allocation is about 0.00390
71// the probability of sampling a 1MB allocation is about 0.632
72// the probability of sampling a 1GB allocation is about 1.0
73//
74// The sampler works by representing memory as a long stream from
75// which allocations are taken. Some of the bytes in this stream are
76// marked and if an allocation includes a marked byte then it is
77// sampled. Bytes are marked according to a Poisson point process
78// with each byte being marked independently with probability
79// p = 1/tcmalloc_sample_parameter. This makes the probability
80// of sampling an allocation of X bytes equal to the CDF of
81// a geometric with mean tcmalloc_sample_parameter. (ie. the
82// probability that at least one byte in the range is marked). This
83// is accurately given by the CDF of the corresponding exponential
Brian Silverman20350ac2021-11-17 18:19:55 -080084// distribution : 1 - e^(-X/tcmalloc_sample_parameter_)
Austin Schuh745610d2015-09-06 18:19:50 -070085// Independence of the byte marking ensures independence of
86// the sampling of each allocation.
87//
88// This scheme is implemented by noting that, starting from any
89// fixed place, the number of bytes until the next marked byte
90// is geometrically distributed. This number is recorded as
91// bytes_until_sample_. Every allocation subtracts from this
92// number until it is less than 0. When this happens the current
93// allocation is sampled.
94//
95// When an allocation occurs, bytes_until_sample_ is reset to
96// a new independtly sampled geometric number of bytes. The
97// memoryless property of the point process means that this may
98// be taken as the number of bytes after the end of the current
99// allocation until the next marked byte. This ensures that
100// very large allocations which would intersect many marked bytes
101// only result in a single call to PickNextSamplingPoint.
102//-------------------------------------------------------------------
103
Brian Silverman20350ac2021-11-17 18:19:55 -0800104class SamplerTest;
105
Austin Schuh745610d2015-09-06 18:19:50 -0700106class PERFTOOLS_DLL_DECL Sampler {
107 public:
Brian Silverman20350ac2021-11-17 18:19:55 -0800108 constexpr Sampler() {}
Austin Schuh745610d2015-09-06 18:19:50 -0700109
Brian Silverman20350ac2021-11-17 18:19:55 -0800110 // Initialize this sampler.
111 void Init(uint64_t seed);
112
113 // Record allocation of "k" bytes. Return true if no further work
114 // is need, and false if allocation needed to be sampled.
115 bool RecordAllocation(size_t k);
116
117 // Same as above (but faster), except:
118 // a) REQUIRES(k < std::numeric_limits<ssize_t>::max())
119 // b) if this returns false, you must call RecordAllocation
120 // to confirm if sampling truly needed.
121 //
122 // The point of this function is to only deal with common case of no
123 // sampling and let caller (which is in malloc fast-path) to
124 // "escalate" to fuller and slower logic only if necessary.
125 bool TryRecordAllocationFast(size_t k);
Austin Schuh745610d2015-09-06 18:19:50 -0700126
127 // Generate a geometric with mean 512K (or FLAG_tcmalloc_sample_parameter)
Brian Silverman20350ac2021-11-17 18:19:55 -0800128 ssize_t PickNextSamplingPoint();
Austin Schuh745610d2015-09-06 18:19:50 -0700129
130 // Returns the current sample period
Brian Silverman20350ac2021-11-17 18:19:55 -0800131 static int GetSamplePeriod();
Austin Schuh745610d2015-09-06 18:19:50 -0700132
133 // The following are public for the purposes of testing
134 static uint64_t NextRandom(uint64_t rnd_); // Returns the next prng value
Austin Schuh745610d2015-09-06 18:19:50 -0700135
Brian Silverman20350ac2021-11-17 18:19:55 -0800136 // C++03 requires that types stored in TLS be POD. As a result, you must
137 // initialize these members to {0, 0, false} before using this class!
138 //
139 // TODO(ahh): C++11 support will let us make these private.
140
141 // Bytes until we sample next.
142 //
143 // More specifically when bytes_until_sample_ is X, we can allocate
144 // X bytes without triggering sampling; on the (X+1)th allocated
145 // byte, the containing allocation will be sampled.
146 //
147 // Always non-negative with only very brief exceptions (see
148 // DecrementFast{,Finish}, so casting to size_t is ok.
Austin Schuh745610d2015-09-06 18:19:50 -0700149 private:
Brian Silverman20350ac2021-11-17 18:19:55 -0800150 friend class SamplerTest;
151 bool RecordAllocationSlow(size_t k);
Austin Schuh745610d2015-09-06 18:19:50 -0700152
Brian Silverman20350ac2021-11-17 18:19:55 -0800153 ssize_t bytes_until_sample_{};
154 uint64_t rnd_{}; // Cheap random number generator
155 bool initialized_{};
Austin Schuh745610d2015-09-06 18:19:50 -0700156};
157
Brian Silverman20350ac2021-11-17 18:19:55 -0800158inline bool Sampler::RecordAllocation(size_t k) {
159 // The first time we enter this function we expect bytes_until_sample_
160 // to be zero, and we must call SampleAllocationSlow() to ensure
161 // proper initialization of static vars.
162 ASSERT(Static::IsInited() || bytes_until_sample_ == 0);
163
164 // Note that we have to deal with arbitrarily large values of k
165 // here. Thus we're upcasting bytes_until_sample_ to unsigned rather
166 // than the other way around. And this is why this code cannot be
167 // merged with DecrementFast code below.
168 if (static_cast<size_t>(bytes_until_sample_) < k) {
169 bool result = RecordAllocationSlow(k);
170 ASSERT(Static::IsInited());
171 return result;
Austin Schuh745610d2015-09-06 18:19:50 -0700172 } else {
173 bytes_until_sample_ -= k;
Brian Silverman20350ac2021-11-17 18:19:55 -0800174 ASSERT(Static::IsInited());
175 return true;
176 }
177}
178
179inline bool Sampler::TryRecordAllocationFast(size_t k) {
180 // For efficiency reason, we're testing bytes_until_sample_ after
181 // decrementing it by k. This allows compiler to do sub <reg>, <mem>
182 // followed by conditional jump on sign. But it is correct only if k
183 // is actually smaller than largest ssize_t value. Otherwise
184 // converting k to signed value overflows.
185 //
186 // It would be great for generated code to be sub <reg>, <mem>
187 // followed by conditional jump on 'carry', which would work for
188 // arbitrary values of k, but there seem to be no way to express
189 // that in C++.
190 //
191 // Our API contract explicitly states that only small values of k
192 // are permitted. And thus it makes sense to assert on that.
193 ASSERT(static_cast<ssize_t>(k) >= 0);
194
195 bytes_until_sample_ -= static_cast<ssize_t>(k);
196 if (PREDICT_FALSE(bytes_until_sample_ < 0)) {
197 // Note, we undo sampling counter update, since we're not actually
198 // handling slow path in the "needs sampling" case (calling
199 // RecordAllocationSlow to reset counter). And we do that in order
200 // to avoid non-tail calls in malloc fast-path. See also comments
201 // on declaration inside Sampler class.
202 //
203 // volatile is used here to improve compiler's choice of
204 // instuctions. We know that this path is very rare and that there
205 // is no need to keep previous value of bytes_until_sample_ in
206 // register. This helps compiler generate slightly more efficient
207 // sub <reg>, <mem> instruction for subtraction above.
208 volatile ssize_t *ptr =
209 const_cast<volatile ssize_t *>(&bytes_until_sample_);
James Kuszmaul9776b392023-01-14 14:08:08 -0800210 *ptr = *ptr + k;
Austin Schuh745610d2015-09-06 18:19:50 -0700211 return false;
212 }
Brian Silverman20350ac2021-11-17 18:19:55 -0800213 return true;
Austin Schuh745610d2015-09-06 18:19:50 -0700214}
215
216// Inline functions which are public for testing purposes
217
218// Returns the next prng value.
219// pRNG is: aX+b mod c with a = 0x5DEECE66D, b = 0xB, c = 1<<48
220// This is the lrand64 generator.
221inline uint64_t Sampler::NextRandom(uint64_t rnd) {
Brian Silverman20350ac2021-11-17 18:19:55 -0800222 const uint64_t prng_mult = 0x5DEECE66DULL;
Austin Schuh745610d2015-09-06 18:19:50 -0700223 const uint64_t prng_add = 0xB;
224 const uint64_t prng_mod_power = 48;
225 const uint64_t prng_mod_mask =
Brian Silverman20350ac2021-11-17 18:19:55 -0800226 ~((~static_cast<uint64_t>(0)) << prng_mod_power);
Austin Schuh745610d2015-09-06 18:19:50 -0700227 return (prng_mult * rnd + prng_add) & prng_mod_mask;
228}
229
Austin Schuh745610d2015-09-06 18:19:50 -0700230} // namespace tcmalloc
231
232#endif // TCMALLOC_SAMPLER_H_