Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame] | 1 | // Copyright 2017 The Abseil Authors. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | #include "absl/time/clock.h" |
| 16 | |
| 17 | #include "absl/base/attributes.h" |
| 18 | |
| 19 | #ifdef _WIN32 |
| 20 | #include <windows.h> |
| 21 | #endif |
| 22 | |
| 23 | #include <algorithm> |
| 24 | #include <atomic> |
| 25 | #include <cerrno> |
| 26 | #include <cstdint> |
| 27 | #include <ctime> |
| 28 | #include <limits> |
| 29 | |
| 30 | #include "absl/base/internal/spinlock.h" |
| 31 | #include "absl/base/internal/unscaledcycleclock.h" |
| 32 | #include "absl/base/macros.h" |
| 33 | #include "absl/base/port.h" |
| 34 | #include "absl/base/thread_annotations.h" |
| 35 | |
| 36 | namespace absl { |
Austin Schuh | b4691e9 | 2020-12-31 12:37:18 -0800 | [diff] [blame^] | 37 | ABSL_NAMESPACE_BEGIN |
Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame] | 38 | Time Now() { |
| 39 | // TODO(bww): Get a timespec instead so we don't have to divide. |
| 40 | int64_t n = absl::GetCurrentTimeNanos(); |
| 41 | if (n >= 0) { |
| 42 | return time_internal::FromUnixDuration( |
| 43 | time_internal::MakeDuration(n / 1000000000, n % 1000000000 * 4)); |
| 44 | } |
| 45 | return time_internal::FromUnixDuration(absl::Nanoseconds(n)); |
| 46 | } |
Austin Schuh | b4691e9 | 2020-12-31 12:37:18 -0800 | [diff] [blame^] | 47 | ABSL_NAMESPACE_END |
Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame] | 48 | } // namespace absl |
| 49 | |
| 50 | // Decide if we should use the fast GetCurrentTimeNanos() algorithm |
| 51 | // based on the cyclecounter, otherwise just get the time directly |
| 52 | // from the OS on every call. This can be chosen at compile-time via |
| 53 | // -DABSL_USE_CYCLECLOCK_FOR_GET_CURRENT_TIME_NANOS=[0|1] |
| 54 | #ifndef ABSL_USE_CYCLECLOCK_FOR_GET_CURRENT_TIME_NANOS |
| 55 | #if ABSL_USE_UNSCALED_CYCLECLOCK |
| 56 | #define ABSL_USE_CYCLECLOCK_FOR_GET_CURRENT_TIME_NANOS 1 |
| 57 | #else |
| 58 | #define ABSL_USE_CYCLECLOCK_FOR_GET_CURRENT_TIME_NANOS 0 |
| 59 | #endif |
| 60 | #endif |
| 61 | |
| 62 | #if defined(__APPLE__) || defined(_WIN32) |
| 63 | #include "absl/time/internal/get_current_time_chrono.inc" |
| 64 | #else |
| 65 | #include "absl/time/internal/get_current_time_posix.inc" |
| 66 | #endif |
| 67 | |
| 68 | // Allows override by test. |
| 69 | #ifndef GET_CURRENT_TIME_NANOS_FROM_SYSTEM |
| 70 | #define GET_CURRENT_TIME_NANOS_FROM_SYSTEM() \ |
| 71 | ::absl::time_internal::GetCurrentTimeNanosFromSystem() |
| 72 | #endif |
| 73 | |
| 74 | #if !ABSL_USE_CYCLECLOCK_FOR_GET_CURRENT_TIME_NANOS |
| 75 | namespace absl { |
Austin Schuh | b4691e9 | 2020-12-31 12:37:18 -0800 | [diff] [blame^] | 76 | ABSL_NAMESPACE_BEGIN |
| 77 | int64_t GetCurrentTimeNanos() { return GET_CURRENT_TIME_NANOS_FROM_SYSTEM(); } |
| 78 | ABSL_NAMESPACE_END |
Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame] | 79 | } // namespace absl |
| 80 | #else // Use the cyclecounter-based implementation below. |
| 81 | |
| 82 | // Allows override by test. |
| 83 | #ifndef GET_CURRENT_TIME_NANOS_CYCLECLOCK_NOW |
| 84 | #define GET_CURRENT_TIME_NANOS_CYCLECLOCK_NOW() \ |
| 85 | ::absl::time_internal::UnscaledCycleClockWrapperForGetCurrentTime::Now() |
| 86 | #endif |
| 87 | |
| 88 | // The following counters are used only by the test code. |
| 89 | static int64_t stats_initializations; |
| 90 | static int64_t stats_reinitializations; |
| 91 | static int64_t stats_calibrations; |
| 92 | static int64_t stats_slow_paths; |
| 93 | static int64_t stats_fast_slow_paths; |
| 94 | |
| 95 | namespace absl { |
Austin Schuh | b4691e9 | 2020-12-31 12:37:18 -0800 | [diff] [blame^] | 96 | ABSL_NAMESPACE_BEGIN |
Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame] | 97 | namespace time_internal { |
| 98 | // This is a friend wrapper around UnscaledCycleClock::Now() |
| 99 | // (needed to access UnscaledCycleClock). |
| 100 | class UnscaledCycleClockWrapperForGetCurrentTime { |
| 101 | public: |
| 102 | static int64_t Now() { return base_internal::UnscaledCycleClock::Now(); } |
| 103 | }; |
| 104 | } // namespace time_internal |
| 105 | |
| 106 | // uint64_t is used in this module to provide an extra bit in multiplications |
| 107 | |
| 108 | // Return the time in ns as told by the kernel interface. Place in *cycleclock |
| 109 | // the value of the cycleclock at about the time of the syscall. |
| 110 | // This call represents the time base that this module synchronizes to. |
| 111 | // Ensures that *cycleclock does not step back by up to (1 << 16) from |
| 112 | // last_cycleclock, to discard small backward counter steps. (Larger steps are |
| 113 | // assumed to be complete resyncs, which shouldn't happen. If they do, a full |
| 114 | // reinitialization of the outer algorithm should occur.) |
| 115 | static int64_t GetCurrentTimeNanosFromKernel(uint64_t last_cycleclock, |
| 116 | uint64_t *cycleclock) { |
| 117 | // We try to read clock values at about the same time as the kernel clock. |
| 118 | // This value gets adjusted up or down as estimate of how long that should |
| 119 | // take, so we can reject attempts that take unusually long. |
| 120 | static std::atomic<uint64_t> approx_syscall_time_in_cycles{10 * 1000}; |
| 121 | |
| 122 | uint64_t local_approx_syscall_time_in_cycles = // local copy |
| 123 | approx_syscall_time_in_cycles.load(std::memory_order_relaxed); |
| 124 | |
| 125 | int64_t current_time_nanos_from_system; |
| 126 | uint64_t before_cycles; |
| 127 | uint64_t after_cycles; |
| 128 | uint64_t elapsed_cycles; |
| 129 | int loops = 0; |
| 130 | do { |
| 131 | before_cycles = GET_CURRENT_TIME_NANOS_CYCLECLOCK_NOW(); |
| 132 | current_time_nanos_from_system = GET_CURRENT_TIME_NANOS_FROM_SYSTEM(); |
| 133 | after_cycles = GET_CURRENT_TIME_NANOS_CYCLECLOCK_NOW(); |
| 134 | // elapsed_cycles is unsigned, so is large on overflow |
| 135 | elapsed_cycles = after_cycles - before_cycles; |
| 136 | if (elapsed_cycles >= local_approx_syscall_time_in_cycles && |
| 137 | ++loops == 20) { // clock changed frequencies? Back off. |
| 138 | loops = 0; |
| 139 | if (local_approx_syscall_time_in_cycles < 1000 * 1000) { |
| 140 | local_approx_syscall_time_in_cycles = |
| 141 | (local_approx_syscall_time_in_cycles + 1) << 1; |
| 142 | } |
| 143 | approx_syscall_time_in_cycles.store( |
| 144 | local_approx_syscall_time_in_cycles, |
| 145 | std::memory_order_relaxed); |
| 146 | } |
| 147 | } while (elapsed_cycles >= local_approx_syscall_time_in_cycles || |
| 148 | last_cycleclock - after_cycles < (static_cast<uint64_t>(1) << 16)); |
| 149 | |
| 150 | // Number of times in a row we've seen a kernel time call take substantially |
| 151 | // less than approx_syscall_time_in_cycles. |
| 152 | static std::atomic<uint32_t> seen_smaller{ 0 }; |
| 153 | |
| 154 | // Adjust approx_syscall_time_in_cycles to be within a factor of 2 |
| 155 | // of the typical time to execute one iteration of the loop above. |
| 156 | if ((local_approx_syscall_time_in_cycles >> 1) < elapsed_cycles) { |
| 157 | // measured time is no smaller than half current approximation |
| 158 | seen_smaller.store(0, std::memory_order_relaxed); |
| 159 | } else if (seen_smaller.fetch_add(1, std::memory_order_relaxed) >= 3) { |
| 160 | // smaller delays several times in a row; reduce approximation by 12.5% |
| 161 | const uint64_t new_approximation = |
| 162 | local_approx_syscall_time_in_cycles - |
| 163 | (local_approx_syscall_time_in_cycles >> 3); |
| 164 | approx_syscall_time_in_cycles.store(new_approximation, |
| 165 | std::memory_order_relaxed); |
| 166 | seen_smaller.store(0, std::memory_order_relaxed); |
| 167 | } |
| 168 | |
| 169 | *cycleclock = after_cycles; |
| 170 | return current_time_nanos_from_system; |
| 171 | } |
| 172 | |
| 173 | |
| 174 | // --------------------------------------------------------------------- |
| 175 | // An implementation of reader-write locks that use no atomic ops in the read |
| 176 | // case. This is a generalization of Lamport's method for reading a multiword |
| 177 | // clock. Increment a word on each write acquisition, using the low-order bit |
| 178 | // as a spinlock; the word is the high word of the "clock". Readers read the |
| 179 | // high word, then all other data, then the high word again, and repeat the |
| 180 | // read if the reads of the high words yields different answers, or an odd |
| 181 | // value (either case suggests possible interference from a writer). |
| 182 | // Here we use a spinlock to ensure only one writer at a time, rather than |
| 183 | // spinning on the bottom bit of the word to benefit from SpinLock |
| 184 | // spin-delay tuning. |
| 185 | |
| 186 | // Acquire seqlock (*seq) and return the value to be written to unlock. |
| 187 | static inline uint64_t SeqAcquire(std::atomic<uint64_t> *seq) { |
| 188 | uint64_t x = seq->fetch_add(1, std::memory_order_relaxed); |
| 189 | |
| 190 | // We put a release fence between update to *seq and writes to shared data. |
| 191 | // Thus all stores to shared data are effectively release operations and |
| 192 | // update to *seq above cannot be re-ordered past any of them. Note that |
| 193 | // this barrier is not for the fetch_add above. A release barrier for the |
| 194 | // fetch_add would be before it, not after. |
| 195 | std::atomic_thread_fence(std::memory_order_release); |
| 196 | |
| 197 | return x + 2; // original word plus 2 |
| 198 | } |
| 199 | |
| 200 | // Release seqlock (*seq) by writing x to it---a value previously returned by |
| 201 | // SeqAcquire. |
| 202 | static inline void SeqRelease(std::atomic<uint64_t> *seq, uint64_t x) { |
| 203 | // The unlock store to *seq must have release ordering so that all |
| 204 | // updates to shared data must finish before this store. |
| 205 | seq->store(x, std::memory_order_release); // release lock for readers |
| 206 | } |
| 207 | |
| 208 | // --------------------------------------------------------------------- |
| 209 | |
| 210 | // "nsscaled" is unit of time equal to a (2**kScale)th of a nanosecond. |
| 211 | enum { kScale = 30 }; |
| 212 | |
| 213 | // The minimum interval between samples of the time base. |
| 214 | // We pick enough time to amortize the cost of the sample, |
| 215 | // to get a reasonably accurate cycle counter rate reading, |
| 216 | // and not so much that calculations will overflow 64-bits. |
| 217 | static const uint64_t kMinNSBetweenSamples = 2000 << 20; |
| 218 | |
| 219 | // We require that kMinNSBetweenSamples shifted by kScale |
| 220 | // have at least a bit left over for 64-bit calculations. |
| 221 | static_assert(((kMinNSBetweenSamples << (kScale + 1)) >> (kScale + 1)) == |
| 222 | kMinNSBetweenSamples, |
| 223 | "cannot represent kMaxBetweenSamplesNSScaled"); |
| 224 | |
| 225 | // A reader-writer lock protecting the static locations below. |
| 226 | // See SeqAcquire() and SeqRelease() above. |
Austin Schuh | b4691e9 | 2020-12-31 12:37:18 -0800 | [diff] [blame^] | 227 | ABSL_CONST_INIT static absl::base_internal::SpinLock lock( |
| 228 | absl::kConstInit, base_internal::SCHEDULE_KERNEL_ONLY); |
| 229 | ABSL_CONST_INIT static std::atomic<uint64_t> seq(0); |
Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame] | 230 | |
| 231 | // data from a sample of the kernel's time value |
| 232 | struct TimeSampleAtomic { |
| 233 | std::atomic<uint64_t> raw_ns; // raw kernel time |
| 234 | std::atomic<uint64_t> base_ns; // our estimate of time |
| 235 | std::atomic<uint64_t> base_cycles; // cycle counter reading |
| 236 | std::atomic<uint64_t> nsscaled_per_cycle; // cycle period |
| 237 | // cycles before we'll sample again (a scaled reciprocal of the period, |
| 238 | // to avoid a division on the fast path). |
| 239 | std::atomic<uint64_t> min_cycles_per_sample; |
| 240 | }; |
| 241 | // Same again, but with non-atomic types |
| 242 | struct TimeSample { |
| 243 | uint64_t raw_ns; // raw kernel time |
| 244 | uint64_t base_ns; // our estimate of time |
| 245 | uint64_t base_cycles; // cycle counter reading |
| 246 | uint64_t nsscaled_per_cycle; // cycle period |
| 247 | uint64_t min_cycles_per_sample; // approx cycles before next sample |
| 248 | }; |
| 249 | |
| 250 | static struct TimeSampleAtomic last_sample; // the last sample; under seq |
| 251 | |
| 252 | static int64_t GetCurrentTimeNanosSlowPath() ABSL_ATTRIBUTE_COLD; |
| 253 | |
| 254 | // Read the contents of *atomic into *sample. |
| 255 | // Each field is read atomically, but to maintain atomicity between fields, |
| 256 | // the access must be done under a lock. |
| 257 | static void ReadTimeSampleAtomic(const struct TimeSampleAtomic *atomic, |
| 258 | struct TimeSample *sample) { |
| 259 | sample->base_ns = atomic->base_ns.load(std::memory_order_relaxed); |
| 260 | sample->base_cycles = atomic->base_cycles.load(std::memory_order_relaxed); |
| 261 | sample->nsscaled_per_cycle = |
| 262 | atomic->nsscaled_per_cycle.load(std::memory_order_relaxed); |
| 263 | sample->min_cycles_per_sample = |
| 264 | atomic->min_cycles_per_sample.load(std::memory_order_relaxed); |
| 265 | sample->raw_ns = atomic->raw_ns.load(std::memory_order_relaxed); |
| 266 | } |
| 267 | |
| 268 | // Public routine. |
| 269 | // Algorithm: We wish to compute real time from a cycle counter. In normal |
| 270 | // operation, we construct a piecewise linear approximation to the kernel time |
| 271 | // source, using the cycle counter value. The start of each line segment is at |
| 272 | // the same point as the end of the last, but may have a different slope (that |
| 273 | // is, a different idea of the cycle counter frequency). Every couple of |
| 274 | // seconds, the kernel time source is sampled and compared with the current |
| 275 | // approximation. A new slope is chosen that, if followed for another couple |
| 276 | // of seconds, will correct the error at the current position. The information |
| 277 | // for a sample is in the "last_sample" struct. The linear approximation is |
| 278 | // estimated_time = last_sample.base_ns + |
| 279 | // last_sample.ns_per_cycle * (counter_reading - last_sample.base_cycles) |
| 280 | // (ns_per_cycle is actually stored in different units and scaled, to avoid |
| 281 | // overflow). The base_ns of the next linear approximation is the |
| 282 | // estimated_time using the last approximation; the base_cycles is the cycle |
| 283 | // counter value at that time; the ns_per_cycle is the number of ns per cycle |
| 284 | // measured since the last sample, but adjusted so that most of the difference |
| 285 | // between the estimated_time and the kernel time will be corrected by the |
| 286 | // estimated time to the next sample. In normal operation, this algorithm |
| 287 | // relies on: |
| 288 | // - the cycle counter and kernel time rates not changing a lot in a few |
| 289 | // seconds. |
| 290 | // - the client calling into the code often compared to a couple of seconds, so |
| 291 | // the time to the next correction can be estimated. |
| 292 | // Any time ns_per_cycle is not known, a major error is detected, or the |
| 293 | // assumption about frequent calls is violated, the implementation returns the |
| 294 | // kernel time. It records sufficient data that a linear approximation can |
| 295 | // resume a little later. |
| 296 | |
| 297 | int64_t GetCurrentTimeNanos() { |
| 298 | // read the data from the "last_sample" struct (but don't need raw_ns yet) |
| 299 | // The reads of "seq" and test of the values emulate a reader lock. |
| 300 | uint64_t base_ns; |
| 301 | uint64_t base_cycles; |
| 302 | uint64_t nsscaled_per_cycle; |
| 303 | uint64_t min_cycles_per_sample; |
| 304 | uint64_t seq_read0; |
| 305 | uint64_t seq_read1; |
| 306 | |
| 307 | // If we have enough information to interpolate, the value returned will be |
| 308 | // derived from this cycleclock-derived time estimate. On some platforms |
| 309 | // (POWER) the function to retrieve this value has enough complexity to |
| 310 | // contribute to register pressure - reading it early before initializing |
| 311 | // the other pieces of the calculation minimizes spill/restore instructions, |
| 312 | // minimizing icache cost. |
| 313 | uint64_t now_cycles = GET_CURRENT_TIME_NANOS_CYCLECLOCK_NOW(); |
| 314 | |
| 315 | // Acquire pairs with the barrier in SeqRelease - if this load sees that |
| 316 | // store, the shared-data reads necessarily see that SeqRelease's updates |
| 317 | // to the same shared data. |
| 318 | seq_read0 = seq.load(std::memory_order_acquire); |
| 319 | |
| 320 | base_ns = last_sample.base_ns.load(std::memory_order_relaxed); |
| 321 | base_cycles = last_sample.base_cycles.load(std::memory_order_relaxed); |
| 322 | nsscaled_per_cycle = |
| 323 | last_sample.nsscaled_per_cycle.load(std::memory_order_relaxed); |
| 324 | min_cycles_per_sample = |
| 325 | last_sample.min_cycles_per_sample.load(std::memory_order_relaxed); |
| 326 | |
| 327 | // This acquire fence pairs with the release fence in SeqAcquire. Since it |
| 328 | // is sequenced between reads of shared data and seq_read1, the reads of |
| 329 | // shared data are effectively acquiring. |
| 330 | std::atomic_thread_fence(std::memory_order_acquire); |
| 331 | |
| 332 | // The shared-data reads are effectively acquire ordered, and the |
| 333 | // shared-data writes are effectively release ordered. Therefore if our |
| 334 | // shared-data reads see any of a particular update's shared-data writes, |
| 335 | // seq_read1 is guaranteed to see that update's SeqAcquire. |
| 336 | seq_read1 = seq.load(std::memory_order_relaxed); |
| 337 | |
| 338 | // Fast path. Return if min_cycles_per_sample has not yet elapsed since the |
| 339 | // last sample, and we read a consistent sample. The fast path activates |
| 340 | // only when min_cycles_per_sample is non-zero, which happens when we get an |
| 341 | // estimate for the cycle time. The predicate will fail if now_cycles < |
| 342 | // base_cycles, or if some other thread is in the slow path. |
| 343 | // |
| 344 | // Since we now read now_cycles before base_ns, it is possible for now_cycles |
| 345 | // to be less than base_cycles (if we were interrupted between those loads and |
| 346 | // last_sample was updated). This is harmless, because delta_cycles will wrap |
| 347 | // and report a time much much bigger than min_cycles_per_sample. In that case |
| 348 | // we will take the slow path. |
| 349 | uint64_t delta_cycles = now_cycles - base_cycles; |
| 350 | if (seq_read0 == seq_read1 && (seq_read0 & 1) == 0 && |
| 351 | delta_cycles < min_cycles_per_sample) { |
| 352 | return base_ns + ((delta_cycles * nsscaled_per_cycle) >> kScale); |
| 353 | } |
| 354 | return GetCurrentTimeNanosSlowPath(); |
| 355 | } |
| 356 | |
| 357 | // Return (a << kScale)/b. |
| 358 | // Zero is returned if b==0. Scaling is performed internally to |
| 359 | // preserve precision without overflow. |
| 360 | static uint64_t SafeDivideAndScale(uint64_t a, uint64_t b) { |
| 361 | // Find maximum safe_shift so that |
| 362 | // 0 <= safe_shift <= kScale and (a << safe_shift) does not overflow. |
| 363 | int safe_shift = kScale; |
| 364 | while (((a << safe_shift) >> safe_shift) != a) { |
| 365 | safe_shift--; |
| 366 | } |
| 367 | uint64_t scaled_b = b >> (kScale - safe_shift); |
| 368 | uint64_t quotient = 0; |
| 369 | if (scaled_b != 0) { |
| 370 | quotient = (a << safe_shift) / scaled_b; |
| 371 | } |
| 372 | return quotient; |
| 373 | } |
| 374 | |
| 375 | static uint64_t UpdateLastSample( |
| 376 | uint64_t now_cycles, uint64_t now_ns, uint64_t delta_cycles, |
| 377 | const struct TimeSample *sample) ABSL_ATTRIBUTE_COLD; |
| 378 | |
| 379 | // The slow path of GetCurrentTimeNanos(). This is taken while gathering |
| 380 | // initial samples, when enough time has elapsed since the last sample, and if |
| 381 | // any other thread is writing to last_sample. |
| 382 | // |
| 383 | // Manually mark this 'noinline' to minimize stack frame size of the fast |
| 384 | // path. Without this, sometimes a compiler may inline this big block of code |
| 385 | // into the fast path. That causes lots of register spills and reloads that |
| 386 | // are unnecessary unless the slow path is taken. |
| 387 | // |
| 388 | // TODO(absl-team): Remove this attribute when our compiler is smart enough |
| 389 | // to do the right thing. |
| 390 | ABSL_ATTRIBUTE_NOINLINE |
Austin Schuh | b4691e9 | 2020-12-31 12:37:18 -0800 | [diff] [blame^] | 391 | static int64_t GetCurrentTimeNanosSlowPath() ABSL_LOCKS_EXCLUDED(lock) { |
Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame] | 392 | // Serialize access to slow-path. Fast-path readers are not blocked yet, and |
| 393 | // code below must not modify last_sample until the seqlock is acquired. |
| 394 | lock.Lock(); |
| 395 | |
| 396 | // Sample the kernel time base. This is the definition of |
| 397 | // "now" if we take the slow path. |
| 398 | static uint64_t last_now_cycles; // protected by lock |
| 399 | uint64_t now_cycles; |
| 400 | uint64_t now_ns = GetCurrentTimeNanosFromKernel(last_now_cycles, &now_cycles); |
| 401 | last_now_cycles = now_cycles; |
| 402 | |
| 403 | uint64_t estimated_base_ns; |
| 404 | |
| 405 | // ---------- |
| 406 | // Read the "last_sample" values again; this time holding the write lock. |
| 407 | struct TimeSample sample; |
| 408 | ReadTimeSampleAtomic(&last_sample, &sample); |
| 409 | |
| 410 | // ---------- |
| 411 | // Try running the fast path again; another thread may have updated the |
| 412 | // sample between our run of the fast path and the sample we just read. |
| 413 | uint64_t delta_cycles = now_cycles - sample.base_cycles; |
| 414 | if (delta_cycles < sample.min_cycles_per_sample) { |
| 415 | // Another thread updated the sample. This path does not take the seqlock |
| 416 | // so that blocked readers can make progress without blocking new readers. |
| 417 | estimated_base_ns = sample.base_ns + |
| 418 | ((delta_cycles * sample.nsscaled_per_cycle) >> kScale); |
| 419 | stats_fast_slow_paths++; |
| 420 | } else { |
| 421 | estimated_base_ns = |
| 422 | UpdateLastSample(now_cycles, now_ns, delta_cycles, &sample); |
| 423 | } |
| 424 | |
| 425 | lock.Unlock(); |
| 426 | |
| 427 | return estimated_base_ns; |
| 428 | } |
| 429 | |
| 430 | // Main part of the algorithm. Locks out readers, updates the approximation |
| 431 | // using the new sample from the kernel, and stores the result in last_sample |
| 432 | // for readers. Returns the new estimated time. |
| 433 | static uint64_t UpdateLastSample(uint64_t now_cycles, uint64_t now_ns, |
| 434 | uint64_t delta_cycles, |
| 435 | const struct TimeSample *sample) |
Austin Schuh | b4691e9 | 2020-12-31 12:37:18 -0800 | [diff] [blame^] | 436 | ABSL_EXCLUSIVE_LOCKS_REQUIRED(lock) { |
Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame] | 437 | uint64_t estimated_base_ns = now_ns; |
| 438 | uint64_t lock_value = SeqAcquire(&seq); // acquire seqlock to block readers |
| 439 | |
| 440 | // The 5s in the next if-statement limits the time for which we will trust |
| 441 | // the cycle counter and our last sample to give a reasonable result. |
| 442 | // Errors in the rate of the source clock can be multiplied by the ratio |
| 443 | // between this limit and kMinNSBetweenSamples. |
| 444 | if (sample->raw_ns == 0 || // no recent sample, or clock went backwards |
| 445 | sample->raw_ns + static_cast<uint64_t>(5) * 1000 * 1000 * 1000 < now_ns || |
| 446 | now_ns < sample->raw_ns || now_cycles < sample->base_cycles) { |
| 447 | // record this sample, and forget any previously known slope. |
| 448 | last_sample.raw_ns.store(now_ns, std::memory_order_relaxed); |
| 449 | last_sample.base_ns.store(estimated_base_ns, std::memory_order_relaxed); |
| 450 | last_sample.base_cycles.store(now_cycles, std::memory_order_relaxed); |
| 451 | last_sample.nsscaled_per_cycle.store(0, std::memory_order_relaxed); |
| 452 | last_sample.min_cycles_per_sample.store(0, std::memory_order_relaxed); |
| 453 | stats_initializations++; |
| 454 | } else if (sample->raw_ns + 500 * 1000 * 1000 < now_ns && |
Austin Schuh | b4691e9 | 2020-12-31 12:37:18 -0800 | [diff] [blame^] | 455 | sample->base_cycles + 50 < now_cycles) { |
Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame] | 456 | // Enough time has passed to compute the cycle time. |
| 457 | if (sample->nsscaled_per_cycle != 0) { // Have a cycle time estimate. |
| 458 | // Compute time from counter reading, but avoiding overflow |
| 459 | // delta_cycles may be larger than on the fast path. |
| 460 | uint64_t estimated_scaled_ns; |
| 461 | int s = -1; |
| 462 | do { |
| 463 | s++; |
| 464 | estimated_scaled_ns = (delta_cycles >> s) * sample->nsscaled_per_cycle; |
| 465 | } while (estimated_scaled_ns / sample->nsscaled_per_cycle != |
| 466 | (delta_cycles >> s)); |
| 467 | estimated_base_ns = sample->base_ns + |
| 468 | (estimated_scaled_ns >> (kScale - s)); |
| 469 | } |
| 470 | |
| 471 | // Compute the assumed cycle time kMinNSBetweenSamples ns into the future |
| 472 | // assuming the cycle counter rate stays the same as the last interval. |
| 473 | uint64_t ns = now_ns - sample->raw_ns; |
| 474 | uint64_t measured_nsscaled_per_cycle = SafeDivideAndScale(ns, delta_cycles); |
| 475 | |
| 476 | uint64_t assumed_next_sample_delta_cycles = |
| 477 | SafeDivideAndScale(kMinNSBetweenSamples, measured_nsscaled_per_cycle); |
| 478 | |
| 479 | int64_t diff_ns = now_ns - estimated_base_ns; // estimate low by this much |
| 480 | |
| 481 | // We want to set nsscaled_per_cycle so that our estimate of the ns time |
| 482 | // at the assumed cycle time is the assumed ns time. |
| 483 | // That is, we want to set nsscaled_per_cycle so: |
| 484 | // kMinNSBetweenSamples + diff_ns == |
| 485 | // (assumed_next_sample_delta_cycles * nsscaled_per_cycle) >> kScale |
| 486 | // But we wish to damp oscillations, so instead correct only most |
| 487 | // of our current error, by solving: |
| 488 | // kMinNSBetweenSamples + diff_ns - (diff_ns / 16) == |
| 489 | // (assumed_next_sample_delta_cycles * nsscaled_per_cycle) >> kScale |
| 490 | ns = kMinNSBetweenSamples + diff_ns - (diff_ns / 16); |
| 491 | uint64_t new_nsscaled_per_cycle = |
| 492 | SafeDivideAndScale(ns, assumed_next_sample_delta_cycles); |
| 493 | if (new_nsscaled_per_cycle != 0 && |
| 494 | diff_ns < 100 * 1000 * 1000 && -diff_ns < 100 * 1000 * 1000) { |
| 495 | // record the cycle time measurement |
| 496 | last_sample.nsscaled_per_cycle.store( |
| 497 | new_nsscaled_per_cycle, std::memory_order_relaxed); |
| 498 | uint64_t new_min_cycles_per_sample = |
| 499 | SafeDivideAndScale(kMinNSBetweenSamples, new_nsscaled_per_cycle); |
| 500 | last_sample.min_cycles_per_sample.store( |
| 501 | new_min_cycles_per_sample, std::memory_order_relaxed); |
| 502 | stats_calibrations++; |
| 503 | } else { // something went wrong; forget the slope |
| 504 | last_sample.nsscaled_per_cycle.store(0, std::memory_order_relaxed); |
| 505 | last_sample.min_cycles_per_sample.store(0, std::memory_order_relaxed); |
| 506 | estimated_base_ns = now_ns; |
| 507 | stats_reinitializations++; |
| 508 | } |
| 509 | last_sample.raw_ns.store(now_ns, std::memory_order_relaxed); |
| 510 | last_sample.base_ns.store(estimated_base_ns, std::memory_order_relaxed); |
| 511 | last_sample.base_cycles.store(now_cycles, std::memory_order_relaxed); |
| 512 | } else { |
| 513 | // have a sample, but no slope; waiting for enough time for a calibration |
| 514 | stats_slow_paths++; |
| 515 | } |
| 516 | |
| 517 | SeqRelease(&seq, lock_value); // release the readers |
| 518 | |
| 519 | return estimated_base_ns; |
| 520 | } |
Austin Schuh | b4691e9 | 2020-12-31 12:37:18 -0800 | [diff] [blame^] | 521 | ABSL_NAMESPACE_END |
Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame] | 522 | } // namespace absl |
| 523 | #endif // ABSL_USE_CYCLECLOCK_FOR_GET_CURRENT_TIME_NANOS |
| 524 | |
| 525 | namespace absl { |
Austin Schuh | b4691e9 | 2020-12-31 12:37:18 -0800 | [diff] [blame^] | 526 | ABSL_NAMESPACE_BEGIN |
Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame] | 527 | namespace { |
| 528 | |
| 529 | // Returns the maximum duration that SleepOnce() can sleep for. |
| 530 | constexpr absl::Duration MaxSleep() { |
| 531 | #ifdef _WIN32 |
| 532 | // Windows Sleep() takes unsigned long argument in milliseconds. |
| 533 | return absl::Milliseconds( |
| 534 | std::numeric_limits<unsigned long>::max()); // NOLINT(runtime/int) |
| 535 | #else |
| 536 | return absl::Seconds(std::numeric_limits<time_t>::max()); |
| 537 | #endif |
| 538 | } |
| 539 | |
| 540 | // Sleeps for the given duration. |
| 541 | // REQUIRES: to_sleep <= MaxSleep(). |
| 542 | void SleepOnce(absl::Duration to_sleep) { |
| 543 | #ifdef _WIN32 |
| 544 | Sleep(to_sleep / absl::Milliseconds(1)); |
| 545 | #else |
| 546 | struct timespec sleep_time = absl::ToTimespec(to_sleep); |
| 547 | while (nanosleep(&sleep_time, &sleep_time) != 0 && errno == EINTR) { |
| 548 | // Ignore signals and wait for the full interval to elapse. |
| 549 | } |
| 550 | #endif |
| 551 | } |
| 552 | |
| 553 | } // namespace |
Austin Schuh | b4691e9 | 2020-12-31 12:37:18 -0800 | [diff] [blame^] | 554 | ABSL_NAMESPACE_END |
Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame] | 555 | } // namespace absl |
| 556 | |
| 557 | extern "C" { |
| 558 | |
| 559 | ABSL_ATTRIBUTE_WEAK void AbslInternalSleepFor(absl::Duration duration) { |
| 560 | while (duration > absl::ZeroDuration()) { |
| 561 | absl::Duration to_sleep = std::min(duration, absl::MaxSleep()); |
| 562 | absl::SleepOnce(to_sleep); |
| 563 | duration -= to_sleep; |
| 564 | } |
| 565 | } |
| 566 | |
| 567 | } // extern "C" |