summaryrefslogtreecommitdiff
path: root/absl/base/internal/cycleclock.h
diff options
context:
space:
mode:
authorGravatar Abseil Team <absl-team@google.com>2022-03-14 08:43:59 -0700
committerGravatar Andy Getz <durandal@google.com>2022-03-14 12:14:03 -0400
commit5ed77665c4d697f657fa1362b5a482450f858cdc (patch)
tree85ce8fd99e4e0418b1d48c14611dc4b0a1e7ec73 /absl/base/internal/cycleclock.h
parentc5a424a2a21005660b182516eb7a079cd8021699 (diff)
Export of internal Abseil changes
-- d3b99682554d339c42556680f4d65f83226005e2 by Martijn Vels <mvels@google.com>: Inline CycleClock code and remove branch for x86 CycleClockSource function This CL removes the relaxed load for x86 as there is no acquire price to pay on x86. It inlines the UnscaledCycleClock::Now() which is a single RTDSC op for x86, and likewise inlines CycleClock::Now() for x86. The inlining should mostly have secondary benefits such as reducing spills on outlined calls. LTO may eventually hoist these functions inline for the hotspots, but it doesn't hurt to default inline this for all builds and let the compiler decide on the first pass. The perlab benchmark is noisy for the plain BM_Now, but the other benchmarks and the run on my local machine are clear. ------------- Local Benchy Benchmark name old cpu/op new cpu/op delta BM_Now 3.41ns ± 1% 2.30ns ± 2% -32.52% (p=0.000 n=50+50) BM_NowWithRegisterPresure 4.96ns ± 2% 4.19ns ± 2% -15.57% (p=0.000 n=56+55) BM_NowWithCallback 3.30ns ± 2% 1.91ns ± 2% -42.00% (p=0.000 n=47+60) ------------- Perflab Benchy Benchmark name old cpu/op new cpu/op delta BM_Now 8.20ns ±13% 4.32ns ±83% ~ (p=0.413 n=4+5) BM_NowWithRegisterPresure 7.91ns ± 1% 3.68ns ± 2% -53.45% (p=0.029 n=4+4) BM_NowWithCallback 2.66ns ±13% 1.58ns ± 0% -40.51% (p=0.008 n=5+5) PiperOrigin-RevId: 434474766 Change-Id: I991d987ae9233e50f09606c874055cf4c5a56300 -- b38330686a0af176a2679163e4d2fa1b90e2f667 by Laramie Leavitt <lar@google.com>: Style, comment, and test updates * Remove a redundant assert in uniform_real_distribution. * Update comment in internal/generate_real.h * Style updates to uniform_real_distribution_test mainly replacing TypeParam with real_type, using aliases for some limits, etc. * Add a few more minor tests. PiperOrigin-RevId: 433902174 Change-Id: Id75be8e24be2fb8f6aea05feec13e3ef320a7254 -- ab2da6047ff7f5dae3add3779fcddf73b03feabf by Abseil Team <absl-team@google.com>: Remove declaration of method whose definition was previously removed. PiperOrigin-RevId: 433507828 Change-Id: I0130b689813125250f7de2664e767e181f676c89 -- df0c87f4ec2c010691931c1bef9d26470a6e63a2 by Derek Mauro <dmauro@google.com>: Internal change PiperOrigin-RevId: 433289136 Change-Id: Iba157dc83ed99dafd17a2223d2504e49f8afbb9e -- 7445fa312f2995772900eda82467325b3401a17d by Martijn Vels <mvels@google.com>: Optimize CordReader logic now that CONCAT is removed This CL cleans up various helper functions and logic remaining from previous complex CONCAT logic that is no longer needed, simplifying the CordReader logic. PiperOrigin-RevId: 433208748 Change-Id: I5f7b1883573c44e7c6f8af12c3cddbd197cb134d GitOrigin-RevId: d3b99682554d339c42556680f4d65f83226005e2
Diffstat (limited to 'absl/base/internal/cycleclock.h')
-rw-r--r--absl/base/internal/cycleclock.h69
1 files changed, 67 insertions, 2 deletions
diff --git a/absl/base/internal/cycleclock.h b/absl/base/internal/cycleclock.h
index a18b5844..9704e388 100644
--- a/absl/base/internal/cycleclock.h
+++ b/absl/base/internal/cycleclock.h
@@ -42,14 +42,19 @@
#ifndef ABSL_BASE_INTERNAL_CYCLECLOCK_H_
#define ABSL_BASE_INTERNAL_CYCLECLOCK_H_
+#include <atomic>
#include <cstdint>
+#include "absl/base/attributes.h"
#include "absl/base/config.h"
+#include "absl/base/internal/unscaledcycleclock.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace base_internal {
+using CycleClockSourceFunc = int64_t (*)();
+
// -----------------------------------------------------------------------------
// CycleClock
// -----------------------------------------------------------------------------
@@ -68,12 +73,37 @@ class CycleClock {
static double Frequency();
private:
+#if ABSL_USE_UNSCALED_CYCLECLOCK
+ static CycleClockSourceFunc LoadCycleClockSource();
+
+#ifdef NDEBUG
+#ifdef ABSL_INTERNAL_UNSCALED_CYCLECLOCK_FREQUENCY_IS_CPU_FREQUENCY
+ // Not debug mode and the UnscaledCycleClock frequency is the CPU
+ // frequency. Scale the CycleClock to prevent overflow if someone
+ // tries to represent the time as cycles since the Unix epoch.
+ static constexpr int32_t kShift = 1;
+#else
+ // Not debug mode and the UnscaledCycleClock isn't operating at the
+ // raw CPU frequency. There is no need to do any scaling, so don't
+ // needlessly sacrifice precision.
+ static constexpr int32_t kShift = 0;
+#endif
+#else // NDEBUG
+ // In debug mode use a different shift to discourage depending on a
+ // particular shift value.
+ static constexpr int32_t kShift = 2;
+#endif // NDEBUG
+
+ static constexpr double kFrequencyScale = 1.0 / (1 << kShift);
+ ABSL_CONST_INIT static std::atomic<CycleClockSourceFunc> cycle_clock_source_;
+#endif // ABSL_USE_UNSCALED_CYCLECLOC
+
CycleClock() = delete; // no instances
CycleClock(const CycleClock&) = delete;
CycleClock& operator=(const CycleClock&) = delete;
-};
-using CycleClockSourceFunc = int64_t (*)();
+ friend class CycleClockSource;
+};
class CycleClockSource {
private:
@@ -87,6 +117,41 @@ class CycleClockSource {
static void Register(CycleClockSourceFunc source);
};
+#if ABSL_USE_UNSCALED_CYCLECLOCK
+
+inline CycleClockSourceFunc CycleClock::LoadCycleClockSource() {
+#if !defined(__x86_64__)
+ // Optimize for the common case (no callback) by first doing a relaxed load;
+ // this is significantly faster on non-x86 platforms.
+ if (cycle_clock_source_.load(std::memory_order_relaxed) == nullptr) {
+ return nullptr;
+ }
+#endif // !defined(__x86_64__)
+
+ // This corresponds to the store(std::memory_order_release) in
+ // CycleClockSource::Register, and makes sure that any updates made prior to
+ // registering the callback are visible to this thread before the callback
+ // is invoked.
+ return cycle_clock_source_.load(std::memory_order_acquire);
+}
+
+// Accessing globals in inlined code in Window DLLs is problematic.
+#ifndef _WIN32
+inline int64_t CycleClock::Now() {
+ auto fn = LoadCycleClockSource();
+ if (fn == nullptr) {
+ return base_internal::UnscaledCycleClock::Now() >> kShift;
+ }
+ return fn() >> kShift;
+}
+#endif
+
+inline double CycleClock::Frequency() {
+ return kFrequencyScale * base_internal::UnscaledCycleClock::Frequency();
+}
+
+#endif // ABSL_USE_UNSCALED_CYCLECLOCK
+
} // namespace base_internal
ABSL_NAMESPACE_END
} // namespace absl