diff options
author | Abseil Team <absl-team@google.com> | 2019-06-21 13:11:42 -0700 |
---|---|---|
committer | Gennadiy Rozental <rogeeff@google.com> | 2019-06-21 16:18:10 -0400 |
commit | e9324d926a9189e222741fce6e676f0944661a72 (patch) | |
tree | a08568a709940c376454da34c9d8aac021378e5f /absl/random/internal/nanobenchmark.h | |
parent | 43ef2148c0936ebf7cb4be6b19927a9d9d145b8f (diff) |
Export of internal Abseil changes.
--
7a6ff16a85beb730c172d5d25cf1b5e1be885c56 by Laramie Leavitt <lar@google.com>:
Internal change.
PiperOrigin-RevId: 254454546
--
ff8f9bafaefc26d451f576ea4a06d150aed63f6f by Andy Soffer <asoffer@google.com>:
Internal changes
PiperOrigin-RevId: 254451562
--
deefc5b651b479ce36f0b4ef203e119c0c8936f2 by CJ Johnson <johnsoncj@google.com>:
Account for subtracting unsigned values from the size of InlinedVector
PiperOrigin-RevId: 254450625
--
3c677316a27bcadc17e41957c809ca472d5fef14 by Andy Soffer <asoffer@google.com>:
Add C++17's std::make_from_tuple to absl/utility/utility.h
PiperOrigin-RevId: 254411573
--
4ee3536a918830eeec402a28fc31a62c7c90b940 by CJ Johnson <johnsoncj@google.com>:
Adds benchmark for the rest of the InlinedVector public API
PiperOrigin-RevId: 254408378
--
e5a21a00700ee83498ff1efbf649169756463ee4 by CJ Johnson <johnsoncj@google.com>:
Updates the definition of InlinedVector::shrink_to_fit() to be exception safe and adds exception safety tests for it.
PiperOrigin-RevId: 254401387
--
2ea82e72b86d82d78b4e4712a63a55981b53c64b by Laramie Leavitt <lar@google.com>:
Use absl::InsecureBitGen in place of std::mt19937
in tests absl/random/...distribution_test.cc
PiperOrigin-RevId: 254289444
--
fa099e02c413a7ffda732415e8105cad26a90337 by Andy Soffer <asoffer@google.com>:
Internal changes
PiperOrigin-RevId: 254286334
--
ce34b7f36933b30cfa35b9c9a5697a792b5666e4 by Andy Soffer <asoffer@google.com>:
Internal changes
PiperOrigin-RevId: 254273059
--
6f9c473da7c2090c2e85a37c5f00622e8a912a89 by Jorg Brown <jorg@google.com>:
Change absl::container_internal::CompressedTuple to instantiate its
internal Storage class with the name of the type it's holding, rather
than the name of the Tuple. This is not an externally-visible change,
other than less compiler memory is used and less debug information is
generated.
PiperOrigin-RevId: 254269285
--
8bd3c186bf2fc0c55d8a2dd6f28a5327502c9fba by Andy Soffer <asoffer@google.com>:
Adding short-hand IntervalClosed for IntervalClosedClosed and IntervalOpen for
IntervalOpenOpen.
PiperOrigin-RevId: 254252419
--
ea957f99b6a04fccd42aa05605605f3b44b1ecfd by Abseil Team <absl-team@google.com>:
Do not directly use __SIZEOF_INT128__.
In order to avoid linker errors when building with clang-cl (__fixunsdfti, __udivti3 and __fixunssfti are undefined), this CL uses ABSL_HAVE_INTRINSIC_INT128 which is not defined for clang-cl.
PiperOrigin-RevId: 254250739
--
89ab385cd26b34d64130bce856253aaba96d2345 by Andy Soffer <asoffer@google.com>:
Internal changes
PiperOrigin-RevId: 254242321
--
cffc793d93eca6d6bdf7de733847b6ab4a255ae9 by CJ Johnson <johnsoncj@google.com>:
Adds benchmark for InlinedVector::reserve(size_type)
PiperOrigin-RevId: 254199226
--
c90c7a9fa3c8f0c9d5114036979548b055ea2f2a by Gennadiy Rozental <rogeeff@google.com>:
Import of CCTZ from GitHub.
PiperOrigin-RevId: 254072387
--
c4c388beae016c9570ab54ffa1d52660e4a85b7b by Laramie Leavitt <lar@google.com>:
Internal cleanup.
PiperOrigin-RevId: 254062381
--
d3c992e221cc74e5372d0c8fa410170b6a43c062 by Tom Manshreck <shreck@google.com>:
Update distributions.h to Abseil standards
PiperOrigin-RevId: 254054946
--
d15ad0035c34ef11b14fadc5a4a2d3ec415f5518 by CJ Johnson <johnsoncj@google.com>:
Removes functions with only one caller from the implementation details of InlinedVector by manually inlining the definitions
PiperOrigin-RevId: 254005427
--
2f37e807efc3a8ef1f4b539bdd379917d4151520 by Andy Soffer <asoffer@google.com>:
Initial release of Abseil Random
PiperOrigin-RevId: 253999861
--
24ed1694b6430791d781ed533a8f8ccf6cac5856 by CJ Johnson <johnsoncj@google.com>:
Updates the definition of InlinedVector::assign(...)/InlinedVector::operator=(...) to new, exception-safe implementations with exception safety tests to boot
PiperOrigin-RevId: 253993691
--
5613d95f5a7e34a535cfaeadce801441e990843e by CJ Johnson <johnsoncj@google.com>:
Adds benchmarks for InlinedVector::shrink_to_fit()
PiperOrigin-RevId: 253989647
--
2a96ddfdac40bbb8cb6a7f1aeab90917067c6e63 by Abseil Team <absl-team@google.com>:
Initial release of Abseil Random
PiperOrigin-RevId: 253927497
--
bf1aff8fc9ffa921ad74643e9525ecf25b0d8dc1 by Andy Soffer <asoffer@google.com>:
Initial release of Abseil Random
PiperOrigin-RevId: 253920512
--
bfc03f4a3dcda3cf3a4b84bdb84cda24e3394f41 by Laramie Leavitt <lar@google.com>:
Internal change.
PiperOrigin-RevId: 253886486
--
05036cfcc078ca7c5f581a00dfb0daed568cbb69 by Eric Fiselier <ericwf@google.com>:
Don't include `winsock2.h` because it drags in `windows.h` and friends,
and they define awful macros like OPAQUE, ERROR, and more. This has the
potential to break abseil users.
Instead we only forward declare `timeval` and require Windows users
include `winsock2.h` themselves. This is both inconsistent and poor QoI, but so
including 'windows.h' is bad too.
PiperOrigin-RevId: 253852615
GitOrigin-RevId: 7a6ff16a85beb730c172d5d25cf1b5e1be885c56
Change-Id: Icd6aff87da26f29ec8915da856f051129987cef6
Diffstat (limited to 'absl/random/internal/nanobenchmark.h')
-rw-r--r-- | absl/random/internal/nanobenchmark.h | 168 |
1 files changed, 168 insertions, 0 deletions
diff --git a/absl/random/internal/nanobenchmark.h b/absl/random/internal/nanobenchmark.h new file mode 100644 index 00000000..c2b650d1 --- /dev/null +++ b/absl/random/internal/nanobenchmark.h @@ -0,0 +1,168 @@ +// Copyright 2017 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_RANDOM_INTERNAL_NANOBENCHMARK_H_ +#define ABSL_RANDOM_INTERNAL_NANOBENCHMARK_H_ + +// Benchmarks functions of a single integer argument with realistic branch +// prediction hit rates. Uses a robust estimator to summarize the measurements. +// The precision is about 0.2%. +// +// Examples: see nanobenchmark_test.cc. +// +// Background: Microbenchmarks such as http://github.com/google/benchmark +// can measure elapsed times on the order of a microsecond. Shorter functions +// are typically measured by repeating them thousands of times and dividing +// the total elapsed time by this count. Unfortunately, repetition (especially +// with the same input parameter!) influences the runtime. In time-critical +// code, it is reasonable to expect warm instruction/data caches and TLBs, +// but a perfect record of which branches will be taken is unrealistic. +// Unless the application also repeatedly invokes the measured function with +// the same parameter, the benchmark is measuring something very different - +// a best-case result, almost as if the parameter were made a compile-time +// constant. This may lead to erroneous conclusions about branch-heavy +// algorithms outperforming branch-free alternatives. +// +// Our approach differs in three ways. Adding fences to the timer functions +// reduces variability due to instruction reordering, improving the timer +// resolution to about 40 CPU cycles. However, shorter functions must still +// be invoked repeatedly. For more realistic branch prediction performance, +// we vary the input parameter according to a user-specified distribution. +// Thus, instead of VaryInputs(Measure(Repeat(func))), we change the +// loop nesting to Measure(Repeat(VaryInputs(func))). We also estimate the +// central tendency of the measurement samples with the "half sample mode", +// which is more robust to outliers and skewed data than the mean or median. + +// NOTE: for compatibility with multiple translation units compiled with +// distinct flags, avoid #including headers that define functions. + +#include <stddef.h> +#include <stdint.h> + +namespace absl { +namespace random_internal_nanobenchmark { + +// Input influencing the function being measured (e.g. number of bytes to copy). +using FuncInput = size_t; + +// "Proof of work" returned by Func to ensure the compiler does not elide it. +using FuncOutput = uint64_t; + +// Function to measure: either 1) a captureless lambda or function with two +// arguments or 2) a lambda with capture, in which case the first argument +// is reserved for use by MeasureClosure. +using Func = FuncOutput (*)(const void*, FuncInput); + +// Internal parameters that determine precision/resolution/measuring time. +struct Params { + // For measuring timer overhead/resolution. Used in a nested loop => + // quadratic time, acceptable because we know timer overhead is "low". + // constexpr because this is used to define array bounds. + static constexpr size_t kTimerSamples = 256; + + // Best-case precision, expressed as a divisor of the timer resolution. + // Larger => more calls to Func and higher precision. + size_t precision_divisor = 1024; + + // Ratio between full and subset input distribution sizes. Cannot be less + // than 2; larger values increase measurement time but more faithfully + // model the given input distribution. + size_t subset_ratio = 2; + + // Together with the estimated Func duration, determines how many times to + // call Func before checking the sample variability. Larger values increase + // measurement time, memory/cache use and precision. + double seconds_per_eval = 4E-3; + + // The minimum number of samples before estimating the central tendency. + size_t min_samples_per_eval = 7; + + // The mode is better than median for estimating the central tendency of + // skewed/fat-tailed distributions, but it requires sufficient samples + // relative to the width of half-ranges. + size_t min_mode_samples = 64; + + // Maximum permissible variability (= median absolute deviation / center). + double target_rel_mad = 0.002; + + // Abort after this many evals without reaching target_rel_mad. This + // prevents infinite loops. + size_t max_evals = 9; + + // Retry the measure loop up to this many times. + size_t max_measure_retries = 2; + + // Whether to print additional statistics to stdout. + bool verbose = true; +}; + +// Measurement result for each unique input. +struct Result { + FuncInput input; + + // Robust estimate (mode or median) of duration. + float ticks; + + // Measure of variability (median absolute deviation relative to "ticks"). + float variability; +}; + +// Ensures the thread is running on the specified cpu, and no others. +// Reduces noise due to desynchronized socket RDTSC and context switches. +// If "cpu" is negative, pin to the currently running core. +void PinThreadToCPU(const int cpu = -1); + +// Returns tick rate, useful for converting measurements to seconds. Invariant +// means the tick counter frequency is independent of CPU throttling or sleep. +// This call may be expensive, callers should cache the result. +double InvariantTicksPerSecond(); + +// Precisely measures the number of ticks elapsed when calling "func" with the +// given inputs, shuffled to ensure realistic branch prediction hit rates. +// +// "func" returns a 'proof of work' to ensure its computations are not elided. +// "arg" is passed to Func, or reserved for internal use by MeasureClosure. +// "inputs" is an array of "num_inputs" (not necessarily unique) arguments to +// "func". The values should be chosen to maximize coverage of "func". This +// represents a distribution, so a value's frequency should reflect its +// probability in the real application. Order does not matter; for example, a +// uniform distribution over [0, 4) could be represented as {3,0,2,1}. +// Returns how many Result were written to "results": one per unique input, or +// zero if the measurement failed (an error message goes to stderr). +size_t Measure(const Func func, const void* arg, const FuncInput* inputs, + const size_t num_inputs, Result* results, + const Params& p = Params()); + +// Calls operator() of the given closure (lambda function). +template <class Closure> +static FuncOutput CallClosure(const void* f, const FuncInput input) { + return (*reinterpret_cast<const Closure*>(f))(input); +} + +// Same as Measure, except "closure" is typically a lambda function of +// FuncInput -> FuncOutput with a capture list. +template <class Closure> +static inline size_t MeasureClosure(const Closure& closure, + const FuncInput* inputs, + const size_t num_inputs, Result* results, + const Params& p = Params()) { + return Measure(reinterpret_cast<Func>(&CallClosure<Closure>), + reinterpret_cast<const void*>(&closure), inputs, num_inputs, + results, p); +} + +} // namespace random_internal_nanobenchmark +} // namespace absl + +#endif // ABSL_RANDOM_INTERNAL_NANOBENCHMARK_H_ |