diff options
Diffstat (limited to 'tensorflow/core/lib/random')
18 files changed, 2113 insertions, 0 deletions
diff --git a/tensorflow/core/lib/random/distribution_sampler.cc b/tensorflow/core/lib/random/distribution_sampler.cc new file mode 100644 index 0000000000..341f1bd595 --- /dev/null +++ b/tensorflow/core/lib/random/distribution_sampler.cc @@ -0,0 +1,80 @@ +#include "tensorflow/core/lib/random/distribution_sampler.h" + +#include <memory> +#include <vector> + +namespace tensorflow { +namespace random { + +DistributionSampler::DistributionSampler( + const gtl::ArraySlice<float>& weights) { + DCHECK(!weights.empty()); + int n = weights.size(); + num_ = n; + data_.reset(new std::pair<float, int>[n]); + + std::unique_ptr<double[]> pr(new double[n]); + + double sum = 0.0; + for (int i = 0; i < n; i++) { + sum += weights[i]; + set_alt(i, -1); + } + + // These are long/short items - called high/low because of reserved keywords. + std::vector<int> high; + high.reserve(n); + std::vector<int> low; + low.reserve(n); + + // compute propotional weights + for (int i = 0; i < n; i++) { + double p = (weights[i] * n) / sum; + pr[i] = p; + if (p < 1.0) { + low.push_back(i); + } else { + high.push_back(i); + } + } + + // Now pair high with low. + while (!high.empty() && !low.empty()) { + int l = low.back(); + low.pop_back(); + int h = high.back(); + high.pop_back(); + + set_alt(l, h); + DCHECK_GE(pr[h], 1.0); + double remaining = pr[h] - (1.0 - pr[l]); + pr[h] = remaining; + + if (remaining < 1.0) { + low.push_back(h); + } else { + high.push_back(h); + } + } + // Transfer pr to prob with rounding errors. + for (int i = 0; i < n; i++) { + set_prob(i, pr[i]); + } + // Because of rounding errors, both high and low may have elements, that are + // close to 1.0 prob. + for (size_t i = 0; i < high.size(); i++) { + int idx = high[i]; + set_prob(idx, 1.0); + // set alt to self to prevent rounding errors returning 0 + set_alt(idx, idx); + } + for (size_t i = 0; i < low.size(); i++) { + int idx = low[i]; + set_prob(idx, 1.0); + // set alt to self to prevent rounding errors returning 0 + set_alt(idx, idx); + } +} + +} // namespace random +} // namespace tensorflow diff --git a/tensorflow/core/lib/random/distribution_sampler.h b/tensorflow/core/lib/random/distribution_sampler.h new file mode 100644 index 0000000000..ab9598a205 --- /dev/null +++ b/tensorflow/core/lib/random/distribution_sampler.h @@ -0,0 +1,79 @@ +// DistributionSampler allows generating a discrete random variable with a given +// distribution. +// The values taken by the variable are [0, N) and relative weights for each +// value are specified using a vector of size N. +// +// The Algorithm takes O(N) time to precompute data at construction time and +// takes O(1) time (2 random number generation, 2 lookups) for each sample. +// The data structure takes O(N) memory. +// +// In contrast, util/random/weighted-picker.h provides O(lg N) sampling. +// The advantage of that implementation is that weights can be adjusted +// dynamically, while DistributionSampler doesn't allow weight adjustment. +// +// The algorithm used is Walker's Aliasing algorithm, described in Knuth, Vol 2. + +#ifndef TENSORFLOW_LIB_RANDOM_DISTRIBUTION_SAMPLER_H_ +#define TENSORFLOW_LIB_RANDOM_DISTRIBUTION_SAMPLER_H_ + +#include <memory> +#include <utility> +#include <vector> + +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/lib/random/simple_philox.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/port.h" + +namespace tensorflow { +namespace random { + +class DistributionSampler { + public: + explicit DistributionSampler(const gtl::ArraySlice<float>& weights); + + ~DistributionSampler() {} + + int Sample(SimplePhilox* rand) const { + float r = rand->RandFloat(); + // Since n is typically low, we don't bother with UnbiasedUniform. + int idx = rand->Uniform(num_); + if (r < prob(idx)) return idx; + // else pick alt from that bucket. + DCHECK_NE(-1, alt(idx)); + return alt(idx); + } + + int num() const { return num_; } + + private: + float prob(int idx) const { + DCHECK_LT(idx, num_); + return data_[idx].first; + } + + int alt(int idx) const { + DCHECK_LT(idx, num_); + return data_[idx].second; + } + + void set_prob(int idx, float f) { + DCHECK_LT(idx, num_); + data_[idx].first = f; + } + + void set_alt(int idx, int val) { + DCHECK_LT(idx, num_); + data_[idx].second = val; + } + + int num_; + std::unique_ptr<std::pair<float, int>[]> data_; + + TF_DISALLOW_COPY_AND_ASSIGN(DistributionSampler); +}; + +} // namespace random +} // namespace tensorflow + +#endif // TENSORFLOW_LIB_RANDOM_DISTRIBUTION_SAMPLER_H_ diff --git a/tensorflow/core/lib/random/distribution_sampler_test.cc b/tensorflow/core/lib/random/distribution_sampler_test.cc new file mode 100644 index 0000000000..d61a8daa0f --- /dev/null +++ b/tensorflow/core/lib/random/distribution_sampler_test.cc @@ -0,0 +1,90 @@ +#include "tensorflow/core/lib/random/distribution_sampler.h" + +#include <string.h> +#include <memory> +#include <vector> + +#include "tensorflow/core/platform/port.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/test_benchmark.h" +#include "tensorflow/core/lib/random/simple_philox.h" +#include <gtest/gtest.h> + +namespace tensorflow { +namespace random { + +class DistributionSamplerTest : public ::testing::Test { + protected: + // Returns the Chi-Squared statistic for the two distributions. + float TestWeights(const std::vector<float>& weights, int trials_per_bin) { + int iters = weights.size() * trials_per_bin; + std::unique_ptr<float[]> counts(new float[weights.size()]); + memset(counts.get(), 0, sizeof(float) * weights.size()); + DistributionSampler sampler(weights); + PhiloxRandom philox(testing::RandomSeed(), 17); + SimplePhilox random(&philox); + for (int i = 0; i < iters; i++) { + int r = sampler.Sample(&random); + EXPECT_LT(r, weights.size()); + EXPECT_GE(r, 0); + counts[r] += 1.0; + } + float chi2 = 0.0; + for (size_t i = 0; i < weights.size(); i++) { + counts[i] /= iters; + float err = (counts[i] - weights[i]); + chi2 += (err * err) / weights[i]; + } + return chi2; + } + + void TestDistribution(float* arr, int n) { + std::vector<float> w; + w.reserve(n); + for (int i = 0; i < n; i++) { + w.push_back(arr[i]); + } + float var = TestWeights(w, 1000); + if (var < 0.001) return; + // Maybe a statistical skew. Let's try more iterations. + var = TestWeights(w, 100000); + if (var < 0.001) return; + EXPECT_TRUE(false) << "Chi2 is " << var << " in " << n * 100000 + << "iterations"; + } +}; + +TEST_F(DistributionSamplerTest, KnownDistribution) { + float kEven2[] = {0.5, 0.5}; + float kEven3[] = {0.33333333, 0.33333333, 0.33333333}; + float kEven4[] = {0.25, 0.25, 0.25, 0.25}; + + float kDist1[] = {0.8, 0.15, 0.05}; + + TestDistribution(kEven2, TF_ARRAYSIZE(kEven2)); + TestDistribution(kEven3, TF_ARRAYSIZE(kEven3)); + TestDistribution(kEven4, TF_ARRAYSIZE(kEven4)); + TestDistribution(kDist1, TF_ARRAYSIZE(kDist1)); +} + +static void BM_DistributionSampler(int iters, int n) { + testing::StopTiming(); + PhiloxRandom philox(173, 371); + SimplePhilox rand(&philox); + std::vector<float> weights(n, 0); + for (int i = 0; i < n; i++) { + weights[i] = rand.Uniform(100); + } + DistributionSampler picker(weights); + testing::StartTiming(); + int r = 0; + for (int i = 0; i < iters; i++) { + r |= picker.Sample(&rand); + } + CHECK_NE(r, kint32max); +} + +BENCHMARK(BM_DistributionSampler)->Arg(10)->Arg(100)->Arg(1000); + +} // namespace random +} // namespace tensorflow diff --git a/tensorflow/core/lib/random/exact_uniform_int.h b/tensorflow/core/lib/random/exact_uniform_int.h new file mode 100644 index 0000000000..616354cc5c --- /dev/null +++ b/tensorflow/core/lib/random/exact_uniform_int.h @@ -0,0 +1,68 @@ +// Exact uniform integers using rejection sampling + +#ifndef TENSORFLOW_LIB_RANDOM_EXACT_UNIFORM_H_ +#define TENSORFLOW_LIB_RANDOM_EXACT_UNIFORM_H_ + +#include <type_traits> + +namespace tensorflow { +namespace random { + +template <typename UintType, typename RandomBits> +UintType ExactUniformInt(const UintType n, const RandomBits& random) { + static_assert(std::is_unsigned<UintType>::value, + "UintType must be an unsigned int"); + static_assert(std::is_same<UintType, decltype(random())>::value, + "random() should return UintType"); + if (n == 0) { + // Consume a value anyway + // TODO(irving): Assert n != 0, since this case makes no sense. + return random() * n; + } else if (0 == (n & (n - 1))) { + // N is a power of two, so just mask off the lower bits. + return random() & (n - 1); + } else { + // Reject all numbers that skew the distribution towards 0. + + // random's output is uniform in the half-open interval [0, 2^{bits}). + // For any interval [m,n), the number of elements in it is n-m. + + const UintType range = ~static_cast<UintType>(0); + const UintType rem = (range % n) + 1; + UintType rnd; + + // rem = ((2^bits-1) \bmod n) + 1 + // 1 <= rem <= n + + // NB: rem == n is impossible, since n is not a power of 2 (from + // earlier check). + + do { + rnd = random(); // rnd uniform over [0, 2^{bits}) + } while (rnd < rem); // reject [0, rem) + // rnd is uniform over [rem, 2^{bits}) + // + // The number of elements in the half-open interval is + // + // 2^{bits} - rem = 2^{bits} - ((2^{bits}-1) \bmod n) - 1 + // = 2^{bits}-1 - ((2^{bits}-1) \bmod n) + // = n \cdot \lfloor (2^{bits}-1)/n \rfloor + // + // therefore n evenly divides the number of integers in the + // interval. + // + // The function v \rightarrow v % n takes values from [bias, + // 2^{bits}) to [0, n). Each integer in the range interval [0, n) + // will have exactly \lfloor (2^{bits}-1)/n \rfloor preimages from + // the domain interval. + // + // Therefore, v % n is uniform over [0, n). QED. + + return rnd % n; + } +} + +} // namespace random +} // namespace tensorflow + +#endif // TENSORFLOW_LIB_RANDOM_EXACT_UNIFORM_H_ diff --git a/tensorflow/core/lib/random/philox_random.h b/tensorflow/core/lib/random/philox_random.h new file mode 100644 index 0000000000..2c3cd0c4b9 --- /dev/null +++ b/tensorflow/core/lib/random/philox_random.h @@ -0,0 +1,232 @@ +// Implement the Philox algorithm to generate random numbers in parallel. +// Salmon et al. SC 2011. Parallel random numbers: as easy as 1, 2, 3. +// http://www.thesalmons.org/john/random123/papers/random123sc11.pdf + +#ifndef TENSORFLOW_LIB_RANDOM_PHILOX_RANDOM_H_ +#define TENSORFLOW_LIB_RANDOM_PHILOX_RANDOM_H_ + +#include <stdlib.h> + +#include "tensorflow/core/platform/port.h" + +// Function qualifiers that need to work on both CPU and GPU. +#ifdef __CUDA_ARCH__ +// For nvcc. +#define PHILOX_DEVICE_FUNC __host__ __device__ +#define PHILOX_INLINE __inline__ +#else +// For non-nvcc. +#define PHILOX_DEVICE_FUNC +#define PHILOX_INLINE inline +#endif +#define PHILOX_DEVICE_INLINE PHILOX_DEVICE_FUNC PHILOX_INLINE + +#include <math.h> + +namespace tensorflow { +namespace random { + +// A class that represents an inline array. It can be used on both CPU and GPU, +// and also trivially copyable between CPU and GPU. +// Arguments: +// T: the array element type; +// ElementCount: the fixed size of the array; +template <typename T, int ElementCount> +class Array { + public: + PHILOX_DEVICE_INLINE Array() { + for (int i = 0; i < ElementCount; ++i) { + data_[i] = T(); + } + } + + PHILOX_DEVICE_INLINE const T& operator[](int index) const { + return data_[index]; + } + + PHILOX_DEVICE_INLINE T& operator[](int index) { return data_[index]; } + + size_t size() const { return ElementCount; } + + private: + T data_[ElementCount]; +}; + +// A class that encapsulates all the states for a random number generator using +// the philox_4x32_10 algorithm. Each invocation returns a 128-bit random bits +// in the form of four uint32. +// There are multiple variants of this algorithm, we picked the 4x32_10 version +// that is most suited for our applications. +// Since this class is meant to be copied between CPU to GPU, it maintains a +// value semantics. +// +// For example: To use this class and populate an array of 1024 randoms on CPU +// with two threads, +// +// void Fill(PhiloxRandom rnd, uint32* output, int start, int limit) { +// assert(start % 4 == 0); +// assert(limit % 4 == 0); +// rnd.Skip(start / 4); +// for (int i = start; i < limit; i += 4) { +// auto sample = rnd(); +// ... copy sample[0..3] to output[i..i+3] +// } +// } +// +// PhiloxRandom rng(seed); +// PhiloxRandom rng_copy = rng; +// rng.Skip(1000/4); +// +// ... schedule Fill(rng_copy, output, 0, 512) in thread 1; +// ... schedule Fill(rng_copy, output, 512, 1024) in thread 2; +// ... wait for thread 1 & 2 to finish executing Fill(). +// +// NOTE: +// 1. PhiloxRandom is trivially copyable. +// 2. PhiloxRandom is compilable by gcc and nvcc. +class PhiloxRandom { + public: + typedef Array<uint32, 4> ResultType; + typedef uint32 ResultElementType; + // The number of elements that will be returned. + static const int kResultElementCount = 4; + + PHILOX_DEVICE_INLINE + PhiloxRandom() {} + + PHILOX_DEVICE_INLINE + explicit PhiloxRandom(uint64 seed) { + key_[0] = static_cast<uint32>(seed); + key_[1] = static_cast<uint32>(seed >> 32); + } + + PHILOX_DEVICE_INLINE + explicit PhiloxRandom(uint64 seed_lo, uint64 seed_hi) { + key_[0] = static_cast<uint32>(seed_lo); + key_[1] = static_cast<uint32>(seed_lo >> 32); + counter_[2] = static_cast<uint32>(seed_hi); + counter_[3] = static_cast<uint32>(seed_hi >> 32); + } + + // Skip the specified number of samples of 128-bits in the current stream. + PHILOX_DEVICE_INLINE + void Skip(uint64 count) { + const uint32 count_lo = static_cast<uint32>(count); + uint32 count_hi = static_cast<uint32>(count >> 32); + + counter_[0] += count_lo; + if (counter_[0] < count_lo) { + ++count_hi; + } + + counter_[1] += count_hi; + if (counter_[1] < count_hi) { + if (++counter_[2] == 0) { + ++counter_[3]; + } + } + } + + // Returns a group of four random numbers using the underlying Philox + // algorithm. + PHILOX_DEVICE_INLINE ResultType operator()() { + ResultType counter = counter_; + Key key = key_; + + // Run the single rounds for ten times. Manually unrolling the loop + // for better performance. + counter = ComputeSingleRound(counter, key); + RaiseKey(&key); + counter = ComputeSingleRound(counter, key); + RaiseKey(&key); + counter = ComputeSingleRound(counter, key); + RaiseKey(&key); + counter = ComputeSingleRound(counter, key); + RaiseKey(&key); + counter = ComputeSingleRound(counter, key); + RaiseKey(&key); + counter = ComputeSingleRound(counter, key); + RaiseKey(&key); + counter = ComputeSingleRound(counter, key); + RaiseKey(&key); + counter = ComputeSingleRound(counter, key); + RaiseKey(&key); + counter = ComputeSingleRound(counter, key); + RaiseKey(&key); + counter = ComputeSingleRound(counter, key); + + SkipOne(); + + return counter; + } + + private: + // The type for the 64-bit key stored in the form of two 32-bit uint + // that are used in the diffusion process. + typedef Array<uint32, 2> Key; + + // We use the same constants as recommended by the original paper. + static const uint32 kPhiloxW32A = 0x9E3779B9; + static const uint32 kPhiloxW32B = 0xBB67AE85; + static const uint32 kPhiloxM4x32A = 0xD2511F53; + static const uint32 kPhiloxM4x32B = 0xCD9E8D57; + + // Helper function to skip the next sample of 128-bits in the current stream. + PHILOX_DEVICE_INLINE void SkipOne() { + if (++counter_[0] == 0) { + if (++counter_[1] == 0) { + if (++counter_[2] == 0) { + ++counter_[3]; + } + } + } + } + + // Helper function to return the lower and higher 32-bits from two 32-bit + // integer multiplications. + PHILOX_DEVICE_INLINE + static void MultiplyHighLow(uint32 a, uint32 b, uint32* result_low, + uint32* result_high) { +#ifndef __GCUDACC__ + const uint64 product = static_cast<uint64>(a) * b; + *result_low = static_cast<uint32>(product); + *result_high = static_cast<uint32>(product >> 32); +#else + *result_low = a * b; + *result_high = __umulhi(a, b); +#endif + } + + // Helper function for a single round of the underlying Philox algorithm. + PHILOX_DEVICE_INLINE static ResultType ComputeSingleRound( + const ResultType& counter, const Key& key) { + uint32 lo0; + uint32 hi0; + MultiplyHighLow(kPhiloxM4x32A, counter[0], &lo0, &hi0); + + uint32 lo1; + uint32 hi1; + MultiplyHighLow(kPhiloxM4x32B, counter[2], &lo1, &hi1); + + ResultType result; + result[0] = hi1 ^ counter[1] ^ key[0]; + result[1] = lo1; + result[2] = hi0 ^ counter[3] ^ key[1]; + result[3] = lo0; + return result; + } + + PHILOX_DEVICE_INLINE void RaiseKey(Key* key) { + (*key)[0] += kPhiloxW32A; + (*key)[1] += kPhiloxW32B; + } + + private: + ResultType counter_; + Key key_; +}; + +} // namespace random +} // namespace tensorflow + +#endif // TENSORFLOW_LIB_RANDOM_PHILOX_RANDOM_H_ diff --git a/tensorflow/core/lib/random/philox_random_test.cc b/tensorflow/core/lib/random/philox_random_test.cc new file mode 100644 index 0000000000..997c0263b7 --- /dev/null +++ b/tensorflow/core/lib/random/philox_random_test.cc @@ -0,0 +1,58 @@ +#include "tensorflow/core/lib/random/philox_random.h" + +#include <math.h> +#include <algorithm> +#include <functional> +#include <unordered_map> +#include <vector> + +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/lib/random/philox_random_test_utils.h" +#include "tensorflow/core/lib/random/random.h" +#include "tensorflow/core/lib/random/random_distributions.h" +#include <gtest/gtest.h> + +namespace tensorflow { +namespace random { +namespace { + +// A trivial distribution that just returns the PhiloxRandom as a distribution +class TrivialPhiloxDistribution { + public: + // The number of elements that will be returned. + static constexpr int kResultElementCount = PhiloxRandom::kResultElementCount; + typedef PhiloxRandom::ResultType ResultType; + typedef PhiloxRandom::ResultElementType ResultElementType; + + PHILOX_DEVICE_INLINE + ResultType operator()(PhiloxRandom* gen) { return (*gen)(); } +}; + +// This test checks that skipping certain number of samples, is equivalent to +// generate the same number of samples without skipping. +TEST(PhiloxRandomTest, SkipMatchTest) { + constexpr int count = 1024; + constexpr int skip_count = 2048; + + uint64 test_seed = GetTestSeed(); + std::vector<uint32> v1(count); + { + PhiloxRandom gen(test_seed); + gen.Skip(skip_count / 4); + FillRandoms<TrivialPhiloxDistribution>(gen, &v1[0], v1.size()); + } + + std::vector<uint32> v2(count + skip_count); + { + PhiloxRandom gen(test_seed); + FillRandoms<TrivialPhiloxDistribution>(gen, &v2[0], v2.size()); + } + + for (int i = 0; i < count; ++i) { + ASSERT_EQ(v1[i], v2[i + skip_count]); + } +} + +} // namespace +} // namespace random +} // namespace tensorflow diff --git a/tensorflow/core/lib/random/philox_random_test_utils.h b/tensorflow/core/lib/random/philox_random_test_utils.h new file mode 100644 index 0000000000..d22f6b36e4 --- /dev/null +++ b/tensorflow/core/lib/random/philox_random_test_utils.h @@ -0,0 +1,36 @@ +#ifndef TENSORFLOW_LIB_RANDOM_PHILOX_RANDOM_TEST_UTILS_H_ +#define TENSORFLOW_LIB_RANDOM_PHILOX_RANDOM_TEST_UTILS_H_ + +#include <algorithm> + +#include "tensorflow/core/lib/random/philox_random.h" +#include "tensorflow/core/lib/random/random.h" +#include "tensorflow/core/platform/logging.h" + +namespace tensorflow { +namespace random { + +// Return a random seed. +inline uint64 GetTestSeed() { return New64(); } + +// A utility function to fill the given array with samples from the given +// distribution. +template <class Distribution> +void FillRandoms(PhiloxRandom gen, typename Distribution::ResultElementType* p, + int64 size) { + const int granularity = Distribution::kResultElementCount; + + CHECK(size % granularity == 0) << " size: " << size + << " granularity: " << granularity; + + Distribution dist; + for (int i = 0; i < size; i += granularity) { + const auto sample = dist(&gen); + std::copy(&sample[0], &sample[0] + granularity, &p[i]); + } +} + +} // namespace random +} // namespace tensorflow + +#endif // TENSORFLOW_LIB_RANDOM_PHILOX_RANDOM_TEST_UTILS_H_ diff --git a/tensorflow/core/lib/random/random.cc b/tensorflow/core/lib/random/random.cc new file mode 100644 index 0000000000..2959b05382 --- /dev/null +++ b/tensorflow/core/lib/random/random.cc @@ -0,0 +1,22 @@ +#include "tensorflow/core/lib/random/random.h" + +#include <random> +#include "tensorflow/core/platform/port.h" + +namespace tensorflow { +namespace random { + +std::mt19937_64* InitRng() { + std::random_device device("/dev/random"); + return new std::mt19937_64(device()); +} + +uint64 New64() { + static std::mt19937_64* rng = InitRng(); + static mutex mu; + mutex_lock l(mu); + return (*rng)(); +} + +} // namespace random +} // namespace tensorflow diff --git a/tensorflow/core/lib/random/random.h b/tensorflow/core/lib/random/random.h new file mode 100644 index 0000000000..1a20436c4e --- /dev/null +++ b/tensorflow/core/lib/random/random.h @@ -0,0 +1,16 @@ +#ifndef TENSORFLOW_LIB_RANDOM_RANDOM_H_ +#define TENSORFLOW_LIB_RANDOM_RANDOM_H_ + +#include "tensorflow/core/platform/port.h" + +namespace tensorflow { +namespace random { + +// Return a 64-bit random value. Different sequences are generated +// in different processes. +uint64 New64(); + +} // namespace random +} // namespace tensorflow + +#endif // TENSORFLOW_LIB_RANDOM_RANDOM_H_ diff --git a/tensorflow/core/lib/random/random_distributions.h b/tensorflow/core/lib/random/random_distributions.h new file mode 100644 index 0000000000..caafcde513 --- /dev/null +++ b/tensorflow/core/lib/random/random_distributions.h @@ -0,0 +1,361 @@ +#ifndef TENSORFLOW_LIB_RANDOM_RANDOM_DISTRIBUTIONS_H_ +#define TENSORFLOW_LIB_RANDOM_RANDOM_DISTRIBUTIONS_H_ + +#include <math.h> +#include <string.h> +#include <algorithm> + +#include "tensorflow/core/lib/random/philox_random.h" + +namespace tensorflow { +namespace random { + +// Helper function to convert a 32-bit integer to a float between [0..1). +PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x); +// Helper function to convert two 32-bit integers to a double between [0..1). +PHILOX_DEVICE_INLINE double Uint64ToDouble(uint32 x0, uint32 x1); + +// A class that generates uniform distribution random numbers from the +// underlying random integer generator. +// Arguments: +// Generator: a generator type that returns a number of uint32 upon each +// each invocation. It needs to define kResultElementCount for the +// sample count for each invocation, and ResultType for actual +// returned sample type. +// RealType: the data type of the real numberes that will be returned by the +// distribution. This could be either float or double for now. +// This class is meant to be implemented through specialization. The default +// is not defined by design. +template <class Generator, typename RealType> +class UniformDistribution; + +template <class Generator> +class UniformDistribution<Generator, float> { + public: + // The number of elements that will be returned. + static const int kResultElementCount = Generator::kResultElementCount; + // Indicate that this distribution may take variable number of samples + // during the runtime. + static const bool kVariableSamplesPerOutput = false; + typedef Array<float, kResultElementCount> ResultType; + typedef float ResultElementType; + + PHILOX_DEVICE_INLINE + ResultType operator()(Generator* gen) { + typename Generator::ResultType sample = (*gen)(); + ResultType result; + for (int i = 0; i < kResultElementCount; ++i) { + result[i] = Uint32ToFloat(sample[i]); + } + return result; + } +}; + +template <class Generator> +class UniformDistribution<Generator, double> { + public: + // The number of elements that will be returned. + static const int kResultElementCount = Generator::kResultElementCount / 2; + // Indicate that this distribution may take variable number of samples + // during the runtime. + static const bool kVariableSamplesPerOutput = false; + typedef Array<double, kResultElementCount> ResultType; + typedef double ResultElementType; + + PHILOX_DEVICE_INLINE + ResultType operator()(Generator* gen) { + typename Generator::ResultType sample = (*gen)(); + ResultType result; + for (int i = 0; i < kResultElementCount; ++i) { + result[i] = Uint64ToDouble(sample[2 * i], sample[2 * i + 1]); + } + return result; + } +}; + +// A class that adapts the underlying native multiple samples to return a single +// sample at a time. +template <class Generator> +class SingleSampleAdapter { + public: + // The number of elements that will be returned. + static const int kResultElementCount = 1; + // The number of elements that will be returned by the underlying generator. + static const int kNativeElementCount = Generator::kResultElementCount; + typedef typename Generator::ResultElementType ResultType; + typedef typename Generator::ResultElementType ResultElementType; + + PHILOX_DEVICE_INLINE + explicit SingleSampleAdapter(Generator* gen) + : generator_(gen), used_result_index_(Generator::kResultElementCount) {} + + PHILOX_DEVICE_INLINE + ResultType operator()() { + if (used_result_index_ == Generator::kResultElementCount) { + unused_results_ = (*generator_)(); + used_result_index_ = 0; + } + + return unused_results_[used_result_index_++]; + } + + private: + Generator* generator_; + typename Generator::ResultType unused_results_; + int used_result_index_; +}; + +// A class that generates unit normal distribution random numbers from the +// underlying random integer generator. +// Arguments: +// Generator: a generator type that returns a number of uint32 upon each +// each invocation. It needs to define kResultElementCount for the +// sample count for each invocation, and ResultType for actual +// returned sample type. +// RealType: the data type of the real numberes that will be returned by the +// distribution. This could be either float or double for now. +// This class is meant to be implemented through specialization. The default +// is not defined by design. +template <class Generator, typename RealType> +class NormalDistribution; + +PHILOX_DEVICE_INLINE +void BoxMullerFloat(uint32 x0, uint32 x1, float* f0, float* f1); + +PHILOX_DEVICE_INLINE +void BoxMullerDouble(uint32 x0, uint32 x1, uint32 x2, uint32 x3, double* d0, + double* d1); + +template <class Generator> +class NormalDistribution<Generator, float> { + public: + // The number of elements that will be returned. + static const int kResultElementCount = Generator::kResultElementCount; + // Indicate that this distribution may take variable number of samples + // during the runtime. + static const bool kVariableSamplesPerOutput = false; + typedef Array<float, kResultElementCount> ResultType; + typedef float ResultElementType; + + PHILOX_DEVICE_INLINE + ResultType operator()(Generator* gen) { + typename Generator::ResultType sample = (*gen)(); + ResultType result; + for (int i = 0; i < kResultElementCount; i += 2) { + BoxMullerFloat(sample[i], sample[i + 1], &result[i], &result[i + 1]); + } + return result; + } +}; + +template <class Generator> +class NormalDistribution<Generator, double> { + public: + // The number of elements that will be returned. + static const int kResultElementCount = Generator::kResultElementCount / 2; + // Indicate that this distribution may take variable number of samples + // during the runtime. + static const bool kVariableSamplesPerOutput = false; + typedef Array<double, kResultElementCount> ResultType; + typedef double ResultElementType; + + PHILOX_DEVICE_INLINE + ResultType operator()(Generator* gen) { + typename Generator::ResultType sample = (*gen)(); + ResultType result; + for (int i = 0; i < kResultElementCount; i += 2) { + const int i2 = 2 * i; + BoxMullerDouble(sample[i2], sample[i2 + 1], sample[i2 + 2], + sample[i2 + 3], &result[i], &result[i + 1]); + } + return result; + } +}; + +// A class that returns standard normal distribution between +// [-kTruncateValue, kTruncateValue]. +// Arguments: +// Generator: a generator type that returns a number of uint32 upon each +// each invocation. It needs to define kResultElementCount for the +// sample count for each invocation, and ResultType for actual +// returned sample type. +// RealType: the data type of the real numberes that will be returned by the +// distribution. This could be either float or double for now. +// This class is meant to be implemented through specialization. The default +// is not defined by design. +template <class SingleSampleGenerator, typename RealType> +class TruncatedNormalDistribution; + +// Partial specialization for float. +template <class SingleSampleGenerator> +class TruncatedNormalDistribution<SingleSampleGenerator, float> { + public: + // The number of elements that will be returned. + static const int kResultElementCount = + SingleSampleGenerator::kNativeElementCount; + // Indicate that this distribution may take variable number of samples + // during the runtime. + static const bool kVariableSamplesPerOutput = true; + // The threshold where the normal distribution is truncated. + const float kTruncateValue = 2.0f; + + typedef Array<float, kResultElementCount> ResultType; + typedef float ResultElementType; + + PHILOX_DEVICE_INLINE + ResultType operator()(SingleSampleGenerator* gen) { + ResultType results; + int index = 0; + while (true) { + // Repeatedly take samples from the normal distribution, until we have + // the desired number of elements that fall within the pre-defined cutoff + // threshold. + const uint32 x0 = (*gen)(); + const uint32 x1 = (*gen)(); + float f[2]; + BoxMullerFloat(x0, x1, &f[0], &f[1]); + + for (int i = 0; i < 2; ++i) { + if (fabs(f[i]) < kTruncateValue) { + results[index++] = f[i]; + if (index >= kResultElementCount) { + return results; + } + } + } + } + } +}; + +// Partial specialization for double. +template <class SingleSampleGenerator> +class TruncatedNormalDistribution<SingleSampleGenerator, double> { + public: + // The number of elements that will be returned. + static const int kResultElementCount = + (SingleSampleGenerator::kNativeElementCount > 1) + ? SingleSampleGenerator::kNativeElementCount / 2 + : 1; + // Indicate that this distribution may take variable number of samples + // during the runtime. + static const bool kVariableSamplesPerOutput = true; + typedef Array<double, kResultElementCount> ResultType; + typedef double ResultElementType; + const double kTruncateValue = 2.0; + + PHILOX_DEVICE_INLINE + ResultType operator()(SingleSampleGenerator* gen) { + ResultType results; + int index = 0; + while (1) { + const uint32 x0 = (*gen)(); + const uint32 x1 = (*gen)(); + const uint32 x2 = (*gen)(); + const uint32 x3 = (*gen)(); + double d[2]; + BoxMullerDouble(x0, x1, x2, x3, &d[0], &d[1]); + + for (int i = 0; i < 2; ++i) { + if (fabs(d[i]) < kTruncateValue) { + results[index++] = d[i]; + if (index >= kResultElementCount) { + return results; + } + } + } + } + } +}; + +// Helper function to convert two 32-bit uniform integers to two floats +// under the unit normal distribution. +PHILOX_DEVICE_INLINE +void BoxMullerFloat(uint32 x0, uint32 x1, float* f0, float* f1) { + // This function implements the Box-Muller transform: + // http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form + // Do not send a really small number to log(). + // We cannot mark "epsilon" as "static const" because NVCC would complain + const float epsilon = 1.0e-7f; + float u1 = Uint32ToFloat(x0); + if (u1 < epsilon) { + u1 = epsilon; + } + const float v1 = 2.0f * M_PI * Uint32ToFloat(x1); + const float u2 = sqrt(-2.0f * log(u1)); +#if defined(__linux) + sincosf(v1, f0, f1); +#else + *f0 = sinf(v1); + *f1 = cosf(v1); +#endif + *f0 *= u2; + *f1 *= u2; +} + +// Helper function to convert four 32-bit uniform integers to two doubles +// under the unit normal distribution. +PHILOX_DEVICE_INLINE +void BoxMullerDouble(uint32 x0, uint32 x1, uint32 x2, uint32 x3, double* d0, + double* d1) { + // This function implements the Box-Muller transform: + // http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform#Basic_form + // Do not send a really small number to log(). + // We cannot mark "epsilon" as "static const" because NVCC would complain + const double epsilon = 1.0e-7; + double u1 = Uint64ToDouble(x0, x1); + if (u1 < epsilon) { + u1 = epsilon; + } + const double v1 = 2 * M_PI * Uint64ToDouble(x2, x3); + const double u2 = sqrt(-2.0 * log(u1)); +#if defined(__linux) + sincos(v1, d0, d1); +#else + *d0 = sin(v1); + *d1 = cos(v1); +#endif + *d0 *= u2; + *d1 *= u2; +} + +// Helper function to convert an 32-bit integer to a float between [0..1). +PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x) { + // IEEE754 floats are formatted as follows (MSB first): + // sign(1) exponent(8) mantissa(23) + // Conceptually construct the following: + // sign == 0 + // exponent == 127 -- an excess 127 representation of a zero exponent + // mantissa == 23 random bits + const uint32 man = x & 0x7fffffu; // 23 bit mantissa + const uint32 exp = static_cast<uint32>(127); + const uint32 val = (exp << 23) | man; + + // Assumes that endian-ness is same for float and uint32. + float result; + memcpy(&result, &val, sizeof(val)); + return result - 1.0f; +} + +// Helper function to convert two 32-bit integers to a double between [0..1). +PHILOX_DEVICE_INLINE double Uint64ToDouble(uint32 x0, uint32 x1) { + // IEEE754 doubles are formatted as follows (MSB first): + // sign(1) exponent(11) mantissa(52) + // Conceptually construct the following: + // sign == 0 + // exponent == 1023 -- an excess 1023 representation of a zero exponent + // mantissa == 52 random bits + const uint32 mhi = x0 & 0xfffffu; // upper 20 bits of mantissa + const uint32 mlo = x1; // lower 32 bits of mantissa + const uint64 man = (static_cast<uint64>(mhi) << 32) | mlo; // mantissa + const uint64 exp = static_cast<uint64>(1023); + const uint64 val = (exp << 52) | man; + // Assumes that endian-ness is same for double and uint64. + double result; + memcpy(&result, &val, sizeof(val)); + return result - 1.0; +} + +} // namespace random +} // namespace tensorflow + +#endif // TENSORFLOW_LIB_RANDOM_RANDOM_DISTRIBUTIONS_H_ diff --git a/tensorflow/core/lib/random/random_distributions_test.cc b/tensorflow/core/lib/random/random_distributions_test.cc new file mode 100644 index 0000000000..3ce86a907a --- /dev/null +++ b/tensorflow/core/lib/random/random_distributions_test.cc @@ -0,0 +1,270 @@ +#include "tensorflow/core/lib/random/random_distributions.h" + +#include <math.h> +#include <algorithm> +#include <functional> +#include <unordered_map> +#include <vector> + +#include "tensorflow/core/lib/random/philox_random.h" +#include "tensorflow/core/lib/random/philox_random_test_utils.h" +#include "tensorflow/core/lib/random/random.h" +#include "tensorflow/core/platform/logging.h" +#include <gtest/gtest.h> + +namespace tensorflow { +namespace random { +namespace { + +// The largest z-value we want to tolerate. Since the z-test approximates a +// unit normal distribution, it should almost definitely never exceed 6. +static constexpr float kZLimit = 6.0; + +// A utility function to fill the given array with samples from the given +// distribution, using the single adatper of the underlying generator +template <class Distribution> +void FillRandomsWithSingles(PhiloxRandom gen, + typename Distribution::ResultElementType* p, + int64 size) { + int granularity = Distribution::kResultElementCount; + + CHECK(size % granularity == 0) << " size: " << size + << " granularity: " << granularity; + + SingleSampleAdapter<PhiloxRandom> single_samples(&gen); + + Distribution dist; + for (int i = 0; i < size; i += granularity) { + auto sample = dist(&single_samples); + std::copy(&sample[0], &sample[0] + granularity, &p[i]); + } +} + +// Check the given array of samples matches the given theoretical moment +// function at different orders. The test is considered passing if the z-tests +// of all statistical moments are all below z_limit. +// typename T in the template argument could be either float or double. +// Arguments: +// samples: an array of samples to be tested for their statistical properties; +// theoretical_moments: a functor that can calculate arbitrary order of +// of the given distribution; +// max_moments: the largest moments of the uniform distribution to be tested; +// stride: the distance between samples to check for statistical properties +// 0 means the n-th moment of each sample +// any other strides tests for spatial correlation between samples; +// z_limit: the maximum z-test we would consider the test to pass; +template <typename T> +bool CheckSamplesMoments(const std::vector<T>& samples, + std::function<double(int)> theoretical_moments, + int max_moments, int stride, T z_limit) { + const T* const samples_data = &samples[0]; + const int samples_size = samples.size(); + std::vector<double> moments(max_moments + 1); + double* const moments_data = &moments[0]; + std::vector<int> moments_sample_count(max_moments + 1); + int* const moments_sample_count_data = &moments_sample_count[0]; + + for (int k = 0; k < samples_size; ++k) { + double moment = 1.; + for (int i = 0; i <= max_moments; ++i) { + int index = k + i * stride; + if (index >= samples_size) { + break; + } + // moments[i] store the i-th order measured moments. + // bypass std::vector::opeartor[] because they are too slow in the debug + // mode, given the large number of samples. + moments_data[i] += moment; + ++moments_sample_count_data[i]; + moment *= samples_data[index]; + } + } + + // normalize the moments + for (int i = 0; i <= max_moments; ++i) { + moments[i] /= moments_sample_count[i]; + } + + bool status = true; + + for (int i = 1; i <= max_moments; ++i) { + // Calculate the theoretical mean and variance + const double moments_i_mean = (stride == 0) + ? theoretical_moments(i) + : std::pow(theoretical_moments(1), i); + const double moments_i_squared = (stride == 0) + ? theoretical_moments(2 * i) + : std::pow(theoretical_moments(2), i); + const double moments_i_var = + moments_i_squared - moments_i_mean * moments_i_mean; + + // assume every operation has a small numerical error. + static const double kNumericalError = 1e-6; + // it takes i multiplications to calculate one i-th moment. + const double error_per_moment = i * kNumericalError; + const double total_variance = + moments_i_var / moments_sample_count[i] + error_per_moment; + // z_test is approximately a unit normal distribution. + const double z_test = + fabs((moments[i] - moments_i_mean) / sqrt(total_variance)); + + if (z_test > z_limit) { + LOG(ERROR) << "failing z_test:" + << " moment: " << i << " stride: " << stride + << " z_test: " << z_test << " z_limit: " << z_limit + << " measured moments: " << moments[i] + << " theoretical mean of the moments: " << moments_i_mean + << " theoretical var of the moments: " << moments_i_var + << " sample count: " << moments_sample_count[i]; + status = false; + } + } + + return status; +} + +// This tests checks that the generated samples match the theoretical moments +// of the uniform distribution. +template <typename T> +void UniformMomentsTest(int count, int max_moments, + const std::vector<int>& strides, T z_limit) { + auto uniform_moments = [](int n) -> double { return 1. / (n + 1); }; + + std::vector<T> v1(count); + uint64 seed = GetTestSeed(); + PhiloxRandom gen(seed); + FillRandoms<UniformDistribution<PhiloxRandom, T> >(gen, &v1[0], v1.size()); + for (int stride : strides) { + bool status = CheckSamplesMoments<T>(v1, uniform_moments, max_moments, + stride, z_limit); + ASSERT_TRUE(status) << " UniformMomentsTest failing. seed: " << seed; + } +} + +// This test checks that the generated samples match the theoretical moments +// of the unit normal distribution. +template <typename T> +void NormalMomentsTest(int count, int max_moments, + const std::vector<int>& strides, T z_limit) { + auto normal_moments = [](int n) -> double { + if (n % 2 == 1) { + // For an odd order, the moment of a unit normal distribution is zero. + return 0.; + } else { + // For an even order, the moment of a unit normal distribution is. + // (n-1)!! + double v = 1.; + for (int i = n - 1; i >= 1; i -= 2) { + v *= i; + } + return v; + } + }; + + std::vector<T> v1(count); + uint64 seed = GetTestSeed(); + PhiloxRandom gen(seed); + FillRandoms<NormalDistribution<PhiloxRandom, T> >(gen, &v1[0], v1.size()); + + for (int stride : strides) { + bool status = CheckSamplesMoments<T>(v1, normal_moments, max_moments, + stride, z_limit); + ASSERT_TRUE(status) << " NormalMomentsTest failing. seed: " << seed; + } +} + +// A functor to calculate the moments for the truncated normal distribution. +// For any odd order, the moment is zero. But for any other n, it can be proven +// that the following recursive relationship for the moments of the truncated +// standard normal: +// m(n) = (n - 1) * m(n - 2) - 2 * v ^ (n - 1) * f(v) / (2 * Phi(v) - 1) +// where v is the cut-off value, f(v) is the p.d.f of the standard +// normal, and Phi(v) is the c.d.f of the standard normal. +class TruncatedNormalMoments { + public: + double operator()(int n) { + if (n == 0) { + return 1; + } + if (n % 2 == 1) { + // For an odd order, the moment is always zero + return 0.; + } + + // Memoization and check the cached results. + auto iter = cached_results_.find(n); + if (iter != cached_results_.end()) { + return iter->second; + } + + // The real computation of the moment. + double bias = 2.0 * std::pow(kV, n - 1) * kFV / (2.0 * kPhiV - 1.0); + double moment_n_minus_2 = (*this)(n - 2); + double moment_n = (n - 1) * moment_n_minus_2 - bias; + + cached_results_[n] = moment_n; + return moment_n; + } + + private: + const double kV = 2.0; + // f(v), where f is the p.d.f of the normal distribution and v=2. + const double kFV = 1.0 / sqrt(2.0 * M_PI) * exp(-kV * kV / 2.0); + // The numerical evaluation of Phi(v), where v is the truncate value. + // v = 2 in the current implementation. + const double kPhiV = 0.977249868051821; + std::unordered_map<int, double> cached_results_; +}; + +// This test checks that the generated samples matche the theoretical moments +// of the truncated normal distribution. +template <typename T> +void RandomParametersMomentsTest(int count, int max_moments, + const std::vector<int>& strides, T z_limit) { + std::vector<T> v1(count); + uint64 seed = GetTestSeed(); + PhiloxRandom gen(seed); + FillRandomsWithSingles< + TruncatedNormalDistribution<SingleSampleAdapter<PhiloxRandom>, T> >( + gen, &v1[0], v1.size()); + + for (int stride : strides) { + bool status = CheckSamplesMoments<T>(v1, TruncatedNormalMoments(), + max_moments, stride, z_limit); + ASSERT_TRUE(status) << " NormalMomentsTest failing. seed: " << seed; + } +} + +TEST(PhiloxRandomTest, UniformFloatMomentsTest) { + const std::vector<int> strides = {0, 1, 4, 17}; + UniformMomentsTest<float>(1 << 20, 40, strides, kZLimit); +} + +TEST(PhiloxRandomTest, NormalFloatMomentsTest) { + const std::vector<int> strides = {0, 1, 4, 17}; + NormalMomentsTest<float>(8 << 20, 25, strides, kZLimit); +} + +TEST(PhiloxRandomTest, RandomParametersFloatMomentsTest) { + const std::vector<int> strides = {0, 1, 4, 17}; + RandomParametersMomentsTest<float>(1 << 20, 40, strides, kZLimit); +} + +TEST(PhiloxRandomTest, UniformDoubleMomentsTest) { + const std::vector<int> strides = {0, 1, 4, 17}; + UniformMomentsTest<double>(1 << 20, 40, strides, kZLimit); +} + +TEST(PhiloxRandomTest, NormalDoubleMomentsTest) { + const std::vector<int> strides = {0, 1, 4, 17}; + NormalMomentsTest<double>(8 << 20, 25, strides, kZLimit); +} + +TEST(PhiloxRandomTest, RandomParametersDoubleMomentsTest) { + const std::vector<int> strides = {0, 1, 4, 17}; + RandomParametersMomentsTest<double>(1 << 20, 40, strides, kZLimit); +} + +} // namespace +} // namespace random +} // namespace tensorflow diff --git a/tensorflow/core/lib/random/random_test.cc b/tensorflow/core/lib/random/random_test.cc new file mode 100644 index 0000000000..7ed37c8b5e --- /dev/null +++ b/tensorflow/core/lib/random/random_test.cc @@ -0,0 +1,21 @@ +#include "tensorflow/core/lib/random/random.h" + +#include <set> +#include "tensorflow/core/platform/port.h" +#include <gtest/gtest.h> + +namespace tensorflow { +namespace random { +namespace { + +TEST(New64Test, SanityCheck) { + std::set<uint64> values; + for (int i = 0; i < 1000000; i++) { + uint64 x = New64(); + EXPECT_TRUE(values.insert(x).second) << "duplicate " << x; + } +} + +} // namespace +} // namespace random +} // namespace tensorflow diff --git a/tensorflow/core/lib/random/simple_philox.cc b/tensorflow/core/lib/random/simple_philox.cc new file mode 100644 index 0000000000..1035e1f017 --- /dev/null +++ b/tensorflow/core/lib/random/simple_philox.cc @@ -0,0 +1,24 @@ +#include "tensorflow/core/lib/random/simple_philox.h" +#include "tensorflow/core/lib/random/exact_uniform_int.h" +#include "tensorflow/core/platform/logging.h" + +namespace tensorflow { +namespace random { + +uint32 SimplePhilox::Uniform(uint32 n) { + return ExactUniformInt<uint32>(n, [this]() { return Rand32(); }); +} + +uint64 SimplePhilox::Uniform64(uint64 n) { + return ExactUniformInt<uint64>(n, [this]() { return Rand64(); }); +} + +uint32 SimplePhilox::Skewed(int max_log) { + CHECK(0 <= max_log && max_log <= 32); + const int shift = Rand32() % (max_log + 1); + const uint32 mask = shift == 32 ? ~static_cast<uint32>(0) : (1 << shift) - 1; + return Rand32() & mask; +} + +} // namespace random +} // namespace tensorflow diff --git a/tensorflow/core/lib/random/simple_philox.h b/tensorflow/core/lib/random/simple_philox.h new file mode 100644 index 0000000000..12b15d7616 --- /dev/null +++ b/tensorflow/core/lib/random/simple_philox.h @@ -0,0 +1,61 @@ +#ifndef TENSORFLOW_LIB_RANDOM_SIMPLE_PHILOX_H_ +#define TENSORFLOW_LIB_RANDOM_SIMPLE_PHILOX_H_ + +#include <math.h> +#include <string.h> +#include <algorithm> + +#include "tensorflow/core/lib/random/philox_random.h" +#include "tensorflow/core/lib/random/random_distributions.h" + +namespace tensorflow { +namespace random { + +// A simple imperative interface to Philox +class SimplePhilox { + public: + PHILOX_DEVICE_INLINE + explicit SimplePhilox(PhiloxRandom* gen) : single_(gen) {} + + // 32 random bits + PHILOX_DEVICE_INLINE uint32 Rand32() { return single_(); } + + // 64 random bits + PHILOX_DEVICE_INLINE uint64 Rand64() { + const uint32 lo = single_(), hi = single_(); + return lo | static_cast<uint64>(hi) << 32; + } + + // Uniform float in [0, 1) + PHILOX_DEVICE_INLINE float RandFloat() { return Uint32ToFloat(single_()); } + + // Uniform double in [0, 1) + PHILOX_DEVICE_INLINE double RandDouble() { + const uint32 x0 = single_(), x1 = single_(); + return Uint64ToDouble(x0, x1); + } + + // Uniform integer in [0, n). + // Uses rejection sampling, so may need more than one 32-bit sample. + uint32 Uniform(uint32 n); + + // Approximately uniform integer in [0, n). + // Uses rejection sampling, so may need more than one 64-bit sample. + uint64 Uniform64(uint64 n); + + // True with probability 1/n. + bool OneIn(uint32 n) { return Uniform(n) == 0; } + + // Skewed: pick "base" uniformly from range [0,max_log] and then + // return "base" random bits. The effect is to pick a number in the + // range [0,2^max_log-1] with bias towards smaller numbers. + uint32 Skewed(int max_log); + + private: + SingleSampleAdapter<PhiloxRandom> single_; +}; + +} // namespace random +} // namespace tensorflow + +#endif // TENSORFLOW_LIB_RANDOM_SIMPLE_PHILOX_H_ diff --git a/tensorflow/core/lib/random/simple_philox_test.cc b/tensorflow/core/lib/random/simple_philox_test.cc new file mode 100644 index 0000000000..4246b8b4dd --- /dev/null +++ b/tensorflow/core/lib/random/simple_philox_test.cc @@ -0,0 +1,120 @@ +#include "tensorflow/core/lib/random/simple_philox.h" + +#include <set> +#include <string> + +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/port.h" +#include <gtest/gtest.h> + +namespace tensorflow { +namespace random { +namespace { + +TEST(SimplePhiloxTest, FloatTest) { + PhiloxRandom philox(7, 7); + SimplePhilox gen(&philox); + static const int kIters = 1000000; + for (int i = 0; i < kIters; ++i) { + float f = gen.RandFloat(); + EXPECT_LE(0.0f, f); + EXPECT_GT(1.0f, f); + } + for (int i = 0; i < kIters; ++i) { + double d = gen.RandDouble(); + EXPECT_LE(0.0, d); + EXPECT_GT(1.0, d); + } +} + +static void DifferenceTest(const char *names, SimplePhilox *gen1, + SimplePhilox *gen2) { + static const int kIters = 100; + bool different = false; + for (int i = 0; i < kIters; ++i) { + if (gen1->Rand32() != gen2->Rand32()) { + different = true; + break; + } + } + CHECK(different) << "different seeds but same output!"; +} + +TEST(SimplePhiloxTest, DifferenceTest) { + PhiloxRandom philox1(1, 1), philox2(17, 17); + SimplePhilox gen1(&philox1), gen2(&philox2); + + DifferenceTest("SimplePhilox: different seeds", &gen1, &gen2); +} + +TEST(SimplePhiloxTest, DifferenceTestCloseSeeds) { + PhiloxRandom philox1(1, 1), philox2(2, 1); + SimplePhilox gen1(&philox1), gen2(&philox2); + + DifferenceTest("SimplePhilox: close seeds", &gen1, &gen2); +} + +TEST(SimplePhiloxTest, Regression_CloseSeedsAreDifferent) { + const int kCount = 1000; + + // Two seeds differ only by the last bit. + PhiloxRandom philox1(0, 1), philox2(1, 1); + SimplePhilox gen1(&philox1), gen2(&philox2); + + std::set<uint32> first; + std::set<uint32> all; + for (int i = 0; i < kCount; ++i) { + uint32 v = gen1.Rand32(); + first.insert(v); + all.insert(v); + all.insert(gen2.Rand32()); + } + + // Broken array initialization implementation (before 2009-08-18) using the + // above seeds return <1000, 1007>, generating output that is >99% similar. + // The fix returns <1000, 2000> for completely disjoint sets. + EXPECT_EQ(kCount, first.size()); + EXPECT_EQ(2 * kCount, all.size()); +} + +TEST(SimplePhiloxTest, TestUniform) { + PhiloxRandom philox(17, 17); + SimplePhilox gen(&philox); + + uint32 range = 3 * (1L << 29); + uint32 threshold = 1L << 30; + + size_t count = 0; + static const int kTrials = 100000; + for (int i = 0; i < kTrials; ++i) { + uint32 rnd = gen.Uniform(range); + if (rnd < threshold) { + ++count; + } + } + + EXPECT_LT(fabs((threshold + 0.0) / range - (count + 0.0) / kTrials), 0.005); +} + +TEST(SimplePhiloxTest, TestUniform64) { + PhiloxRandom philox(17, 17); + SimplePhilox gen(&philox); + + uint64 range = 3 * (1LL << 59); + uint64 threshold = 1LL << 60; + + size_t count = 0; + static const int kTrials = 100000; + for (int i = 0; i < kTrials; ++i) { + uint64 rnd = gen.Uniform64(range); + if (rnd < threshold) { + ++count; + } + } + + EXPECT_LT(fabs((threshold + 0.0) / range - (count + 0.0) / kTrials), 0.005); +} + +} // namespace +} // namespace random +} // namespace tensorflow diff --git a/tensorflow/core/lib/random/weighted_picker.cc b/tensorflow/core/lib/random/weighted_picker.cc new file mode 100644 index 0000000000..f96da578ec --- /dev/null +++ b/tensorflow/core/lib/random/weighted_picker.cc @@ -0,0 +1,203 @@ +#include "tensorflow/core/lib/random/weighted_picker.h" + +#include <string.h> +#include <algorithm> + +#include "tensorflow/core/lib/random/simple_philox.h" + +namespace tensorflow { +namespace random { + +WeightedPicker::WeightedPicker(int N) { + CHECK_GE(N, 0); + N_ = N; + + // Find the number of levels + num_levels_ = 1; + while (LevelSize(num_levels_ - 1) < N) { + num_levels_++; + } + + // Initialize the levels + level_ = new int32*[num_levels_]; + for (int l = 0; l < num_levels_; l++) { + level_[l] = new int32[LevelSize(l)]; + } + + SetAllWeights(1); +} + +WeightedPicker::~WeightedPicker() { + for (int l = 0; l < num_levels_; l++) { + delete[] level_[l]; + } + delete[] level_; +} + +static int32 UnbiasedUniform(SimplePhilox* r, int32 n) { + CHECK_LE(0, n); + const uint32 range = ~static_cast<uint32>(0); + if (n == 0) { + return r->Rand32() * n; + } else if (0 == (n & (n - 1))) { + // N is a power of two, so just mask off the lower bits. + return r->Rand32() & (n - 1); + } else { + // Reject all numbers that skew the distribution towards 0. + + // Rand32's output is uniform in the half-open interval [0, 2^{32}). + // For any interval [m,n), the number of elements in it is n-m. + + uint32 rem = (range % n) + 1; + uint32 rnd; + + // rem = ((2^{32}-1) \bmod n) + 1 + // 1 <= rem <= n + + // NB: rem == n is impossible, since n is not a power of 2 (from + // earlier check). + + do { + rnd = r->Rand32(); // rnd uniform over [0, 2^{32}) + } while (rnd < rem); // reject [0, rem) + // rnd is uniform over [rem, 2^{32}) + // + // The number of elements in the half-open interval is + // + // 2^{32} - rem = 2^{32} - ((2^{32}-1) \bmod n) - 1 + // = 2^{32}-1 - ((2^{32}-1) \bmod n) + // = n \cdot \lfloor (2^{32}-1)/n \rfloor + // + // therefore n evenly divides the number of integers in the + // interval. + // + // The function v \rightarrow v % n takes values from [bias, + // 2^{32}) to [0, n). Each integer in the range interval [0, n) + // will have exactly \lfloor (2^{32}-1)/n \rfloor preimages from + // the domain interval. + // + // Therefore, v % n is uniform over [0, n). QED. + + return rnd % n; + } +} + +int WeightedPicker::Pick(SimplePhilox* rnd) const { + if (total_weight() == 0) return -1; + + // using unbiased uniform distribution to avoid bias + // toward low elements resulting from a possible use + // of big weights. + return PickAt(UnbiasedUniform(rnd, total_weight())); +} + +int WeightedPicker::PickAt(int32 weight_index) const { + if (weight_index < 0 || weight_index >= total_weight()) return -1; + + int32 position = weight_index; + int index = 0; + + for (int l = 1; l < num_levels_; l++) { + // Pick left or right child of "level_[l-1][index]" + const int32 left_weight = level_[l][2 * index]; + if (position < left_weight) { + // Descend to left child + index = 2 * index; + } else { + // Descend to right child + index = 2 * index + 1; + position -= left_weight; + } + } + CHECK_GE(index, 0); + CHECK_LT(index, N_); + CHECK_LE(position, level_[num_levels_ - 1][index]); + return index; +} + +void WeightedPicker::set_weight(int index, int32 weight) { + assert(index >= 0); + assert(index < N_); + + // Adjust the sums all the way up to the root + const int32 delta = weight - get_weight(index); + for (int l = num_levels_ - 1; l >= 0; l--) { + level_[l][index] += delta; + index >>= 1; + } +} + +void WeightedPicker::SetAllWeights(int32 weight) { + // Initialize leaves + int32* leaves = level_[num_levels_ - 1]; + for (int i = 0; i < N_; i++) leaves[i] = weight; + for (int i = N_; i < LevelSize(num_levels_ - 1); i++) leaves[i] = 0; + + // Now sum up towards the root + RebuildTreeWeights(); +} + +void WeightedPicker::SetWeightsFromArray(int N, const int32* weights) { + Resize(N); + + // Initialize leaves + int32* leaves = level_[num_levels_ - 1]; + for (int i = 0; i < N_; i++) leaves[i] = weights[i]; + for (int i = N_; i < LevelSize(num_levels_ - 1); i++) leaves[i] = 0; + + // Now sum up towards the root + RebuildTreeWeights(); +} + +void WeightedPicker::RebuildTreeWeights() { + for (int l = num_levels_ - 2; l >= 0; l--) { + int32* level = level_[l]; + int32* children = level_[l + 1]; + for (int i = 0; i < LevelSize(l); i++) { + level[i] = children[2 * i] + children[2 * i + 1]; + } + } +} + +void WeightedPicker::Append(int32 weight) { + Resize(num_elements() + 1); + set_weight(num_elements() - 1, weight); +} + +void WeightedPicker::Resize(int new_size) { + CHECK_GE(new_size, 0); + if (new_size <= LevelSize(num_levels_ - 1)) { + // The new picker fits in the existing levels. + + // First zero out any of the weights that are being dropped so + // that the levels are correct (only needed when shrinking) + for (int i = new_size; i < N_; i++) { + set_weight(i, 0); + } + + // We do not need to set any new weights when enlarging because + // the unneeded entries always have weight zero. + N_ = new_size; + return; + } + + // We follow the simple strategy of just copying the old + // WeightedPicker into a new WeightedPicker. The cost is + // O(N) regardless. + assert(new_size > N_); + WeightedPicker new_picker(new_size); + int32* dst = new_picker.level_[new_picker.num_levels_ - 1]; + int32* src = this->level_[this->num_levels_ - 1]; + memcpy(dst, src, sizeof(dst[0]) * N_); + memset(dst + N_, 0, sizeof(dst[0]) * (new_size - N_)); + new_picker.RebuildTreeWeights(); + + // Now swap the two pickers + std::swap(new_picker.N_, this->N_); + std::swap(new_picker.num_levels_, this->num_levels_); + std::swap(new_picker.level_, this->level_); + assert(this->N_ == new_size); +} + +} // namespace random +} // namespace tensorflow diff --git a/tensorflow/core/lib/random/weighted_picker.h b/tensorflow/core/lib/random/weighted_picker.h new file mode 100644 index 0000000000..3d2c2dbb39 --- /dev/null +++ b/tensorflow/core/lib/random/weighted_picker.h @@ -0,0 +1,118 @@ + +// An abstraction to pick from one of N elements with a specified +// weight per element. +// +// The weight for a given element can be changed in O(lg N) time +// An element can be picked in O(lg N) time. +// +// Uses O(N) bytes of memory. +// +// Alternative: distribution-sampler.h allows O(1) time picking, but no weight +// adjustment after construction. + +#ifndef TENSORFLOW_LIB_RANDOM_WEIGHTED_PICKER_H_ +#define TENSORFLOW_LIB_RANDOM_WEIGHTED_PICKER_H_ + +#include <assert.h> + +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/port.h" + +namespace tensorflow { +namespace random { + +class SimplePhilox; + +class WeightedPicker { + public: + // REQUIRES N >= 0 + // Initializes the elements with a weight of one per element + explicit WeightedPicker(int N); + + // Releases all resources + ~WeightedPicker(); + + // Pick a random element with probability proportional to its weight. + // If total weight is zero, returns -1. + int Pick(SimplePhilox* rnd) const; + + // Deterministically pick element x whose weight covers the + // specified weight_index. + // Returns -1 if weight_index is not in the range [ 0 .. total_weight()-1 ] + int PickAt(int32 weight_index) const; + + // Get the weight associated with an element + // REQUIRES 0 <= index < N + int32 get_weight(int index) const; + + // Set the weight associated with an element + // REQUIRES weight >= 0.0f + // REQUIRES 0 <= index < N + void set_weight(int index, int32 weight); + + // Get the total combined weight of all elements + int32 total_weight() const; + + // Get the number of elements in the picker + int num_elements() const; + + // Set weight of each element to "weight" + void SetAllWeights(int32 weight); + + // Resizes the picker to N and + // sets the weight of each element i to weight[i]. + // The sum of the weights should not exceed 2^31 - 2 + // Complexity O(N). + void SetWeightsFromArray(int N, const int32* weights); + + // REQUIRES N >= 0 + // + // Resize the weighted picker so that it has "N" elements. + // Any newly added entries have zero weight. + // + // Note: Resizing to a smaller size than num_elements() will + // not reclaim any memory. If you wish to reduce memory usage, + // allocate a new WeightedPicker of the appropriate size. + // + // It is efficient to use repeated calls to Resize(num_elements() + 1) + // to grow the picker to size X (takes total time O(X)). + void Resize(int N); + + // Grow the picker by one and set the weight of the new entry to "weight". + // + // Repeated calls to Append() in order to grow the + // picker to size X takes a total time of O(X lg(X)). + // Consider using SetWeightsFromArray instead. + void Append(int32 weight); + + private: + // We keep a binary tree with N leaves. The "i"th leaf contains + // the weight of the "i"th element. An internal node contains + // the sum of the weights of its children. + int N_; // Number of elements + int num_levels_; // Number of levels in tree (level-0 is root) + int32** level_; // Array that holds nodes per level + + // Size of each level + static int LevelSize(int level) { return 1 << level; } + + // Rebuild the tree weights using the leaf weights + void RebuildTreeWeights(); + + TF_DISALLOW_COPY_AND_ASSIGN(WeightedPicker); +}; + +inline int32 WeightedPicker::get_weight(int index) const { + DCHECK_GE(index, 0); + DCHECK_LT(index, N_); + return level_[num_levels_ - 1][index]; +} + +inline int32 WeightedPicker::total_weight() const { return level_[0][0]; } + +inline int WeightedPicker::num_elements() const { return N_; } + +} // namespace random +} // namespace tensorflow + +#endif // TENSORFLOW_LIB_RANDOM_WEIGHTED_PICKER_H_ diff --git a/tensorflow/core/lib/random/weighted_picker_test.cc b/tensorflow/core/lib/random/weighted_picker_test.cc new file mode 100644 index 0000000000..0b27d437d5 --- /dev/null +++ b/tensorflow/core/lib/random/weighted_picker_test.cc @@ -0,0 +1,254 @@ +#include "tensorflow/core/lib/random/weighted_picker.h" + +#include <string.h> +#include <vector> + +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/port.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/test_benchmark.h" +#include "tensorflow/core/lib/random/simple_philox.h" +#include <gtest/gtest.h> + +namespace tensorflow { +namespace random { + +static void TestPicker(SimplePhilox* rnd, int size); +static void CheckUniform(SimplePhilox* rnd, WeightedPicker* picker, int trials); +static void CheckSkewed(SimplePhilox* rnd, WeightedPicker* picker, int trials); +static void TestPickAt(int items, const int32* weights); + +TEST(WeightedPicker, Simple) { + PhiloxRandom philox(testing::RandomSeed(), 17); + SimplePhilox rnd(&philox); + + { + VLOG(0) << "======= Zero-length picker"; + WeightedPicker picker(0); + EXPECT_EQ(picker.Pick(&rnd), -1); + } + + { + VLOG(0) << "======= Singleton picker"; + WeightedPicker picker(1); + EXPECT_EQ(picker.Pick(&rnd), 0); + EXPECT_EQ(picker.Pick(&rnd), 0); + EXPECT_EQ(picker.Pick(&rnd), 0); + } + + { + VLOG(0) << "======= Grown picker"; + WeightedPicker picker(0); + for (int i = 0; i < 10; i++) { + picker.Append(1); + } + CheckUniform(&rnd, &picker, 100000); + } + + { + VLOG(0) << "======= Grown picker with zero weights"; + WeightedPicker picker(1); + picker.Resize(10); + EXPECT_EQ(picker.Pick(&rnd), 0); + EXPECT_EQ(picker.Pick(&rnd), 0); + EXPECT_EQ(picker.Pick(&rnd), 0); + } + + { + VLOG(0) << "======= Shrink picker and check weights"; + WeightedPicker picker(1); + picker.Resize(10); + EXPECT_EQ(picker.Pick(&rnd), 0); + EXPECT_EQ(picker.Pick(&rnd), 0); + EXPECT_EQ(picker.Pick(&rnd), 0); + for (int i = 0; i < 10; i++) { + picker.set_weight(i, i); + } + EXPECT_EQ(picker.total_weight(), 45); + picker.Resize(5); + EXPECT_EQ(picker.total_weight(), 10); + picker.Resize(2); + EXPECT_EQ(picker.total_weight(), 1); + picker.Resize(1); + EXPECT_EQ(picker.total_weight(), 0); + } +} + +TEST(WeightedPicker, BigWeights) { + PhiloxRandom philox(testing::RandomSeed() + 1, 17); + SimplePhilox rnd(&philox); + VLOG(0) << "======= Check uniform with big weights"; + WeightedPicker picker(2); + picker.SetAllWeights(2147483646L / 3); // (2^31 - 2) / 3 + CheckUniform(&rnd, &picker, 100000); +} + +TEST(WeightedPicker, Deterministic) { + VLOG(0) << "======= Testing deterministic pick"; + static const int32 weights[] = {1, 0, 200, 5, 42}; + TestPickAt(TF_ARRAYSIZE(weights), weights); +} + +TEST(WeightedPicker, Randomized) { + PhiloxRandom philox(testing::RandomSeed() + 10, 17); + SimplePhilox rnd(&philox); + TestPicker(&rnd, 1); + TestPicker(&rnd, 2); + TestPicker(&rnd, 3); + TestPicker(&rnd, 4); + TestPicker(&rnd, 7); + TestPicker(&rnd, 8); + TestPicker(&rnd, 9); + TestPicker(&rnd, 10); + TestPicker(&rnd, 100); +} + +static void TestPicker(SimplePhilox* rnd, int size) { + VLOG(0) << "======= Testing size " << size; + + // Check that empty picker returns -1 + { + WeightedPicker picker(size); + picker.SetAllWeights(0); + for (int i = 0; i < 100; i++) EXPECT_EQ(picker.Pick(rnd), -1); + } + + // Create zero weights array + std::vector<int32> weights(size); + for (int elem = 0; elem < size; elem++) { + weights[elem] = 0; + } + + // Check that singleton picker always returns the same element + for (int elem = 0; elem < size; elem++) { + WeightedPicker picker(size); + picker.SetAllWeights(0); + picker.set_weight(elem, elem + 1); + for (int i = 0; i < 100; i++) EXPECT_EQ(picker.Pick(rnd), elem); + weights[elem] = 10; + picker.SetWeightsFromArray(size, &weights[0]); + for (int i = 0; i < 100; i++) EXPECT_EQ(picker.Pick(rnd), elem); + weights[elem] = 0; + } + + // Check that uniform picker generates elements roughly uniformly + { + WeightedPicker picker(size); + CheckUniform(rnd, &picker, 100000); + } + + // Check uniform picker that was grown piecemeal + if (size / 3 > 0) { + WeightedPicker picker(size / 3); + while (picker.num_elements() != size) { + picker.Append(1); + } + CheckUniform(rnd, &picker, 100000); + } + + // Check that skewed distribution works + if (size <= 10) { + // When picker grows one element at a time + WeightedPicker picker(size); + int32 weight = 1; + for (int elem = 0; elem < size; elem++) { + picker.set_weight(elem, weight); + weights[elem] = weight; + weight *= 2; + } + CheckSkewed(rnd, &picker, 1000000); + + // When picker is created from an array + WeightedPicker array_picker(0); + array_picker.SetWeightsFromArray(size, &weights[0]); + CheckSkewed(rnd, &array_picker, 1000000); + } +} + +static void CheckUniform(SimplePhilox* rnd, WeightedPicker* picker, + int trials) { + const int size = picker->num_elements(); + int* count = new int[size]; + memset(count, 0, sizeof(count[0]) * size); + for (int i = 0; i < size * trials; i++) { + const int elem = picker->Pick(rnd); + EXPECT_GE(elem, 0); + EXPECT_LT(elem, size); + count[elem]++; + } + const int expected_min = int(0.9 * trials); + const int expected_max = int(1.1 * trials); + for (int i = 0; i < size; i++) { + EXPECT_GE(count[i], expected_min); + EXPECT_LE(count[i], expected_max); + } + delete[] count; +} + +static void CheckSkewed(SimplePhilox* rnd, WeightedPicker* picker, int trials) { + const int size = picker->num_elements(); + int* count = new int[size]; + memset(count, 0, sizeof(count[0]) * size); + for (int i = 0; i < size * trials; i++) { + const int elem = picker->Pick(rnd); + EXPECT_GE(elem, 0); + EXPECT_LT(elem, size); + count[elem]++; + } + + for (int i = 0; i < size - 1; i++) { + LOG(INFO) << i << ": " << count[i]; + const float ratio = float(count[i + 1]) / float(count[i]); + EXPECT_GE(ratio, 1.6f); + EXPECT_LE(ratio, 2.4f); + } + delete[] count; +} + +static void TestPickAt(int items, const int32* weights) { + WeightedPicker picker(items); + picker.SetWeightsFromArray(items, weights); + int weight_index = 0; + for (int i = 0; i < items; ++i) { + for (int j = 0; j < weights[i]; ++j) { + int pick = picker.PickAt(weight_index); + EXPECT_EQ(pick, i); + ++weight_index; + } + } + EXPECT_EQ(weight_index, picker.total_weight()); +} + +static void BM_Create(int iters, int arg) { + while (--iters > 0) { + WeightedPicker p(arg); + } +} +BENCHMARK(BM_Create)->Range(1, 1024); + +static void BM_CreateAndSetWeights(int iters, int arg) { + std::vector<int32> weights(arg); + for (int i = 0; i < arg; i++) { + weights[i] = i * 10; + } + while (--iters > 0) { + WeightedPicker p(arg); + p.SetWeightsFromArray(arg, &weights[0]); + } +} +BENCHMARK(BM_CreateAndSetWeights)->Range(1, 1024); + +static void BM_Pick(int iters, int arg) { + PhiloxRandom philox(301, 17); + SimplePhilox rnd(&philox); + WeightedPicker p(arg); + int result = 0; + while (--iters > 0) { + result += p.Pick(&rnd); + } + VLOG(4) << result; // Dummy use +} +BENCHMARK(BM_Pick)->Range(1, 1024); + +} // namespace random +} // namespace tensorflow |