Add tf.random_poisson(shape, lam) to tf core.

Fixes #6798 Change: 146861107
author: A. Unique TensorFlower <gardener@tensorflow.org> 2017-02-07 18:08:21 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-02-07 18:26:46 -0800
commit: 8b07605e45f55c942d1436116fd5b0cc83a29e1d (patch)
tree: 0dbcbd39e4ba834811f10af5bacc805aa56e397f /tensorflow/core/kernels/random_poisson_op.cc
parent: f6e69848335d82158c0af16cb5d36a3220cdc33d (diff)
1 files changed, 357 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/random_poisson_op.cc b/tensorflow/core/kernels/random_poisson_op.cc
new file mode 100644
index 0000000000..553a4a7f93
--- /dev/null
+++ b/tensorflow/core/kernels/random_poisson_op.cc
@@ -0,0 +1,357 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// See docs in ../ops/random_ops.cc.
+
+#define EIGEN_USE_THREADS
+
+#include "tensorflow/core/kernels/random_poisson_op.h"
+
+#include <algorithm>
+#include <cmath>
+#include <memory>
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/lib/random/random_distributions.h"
+#include "tensorflow/core/lib/random/simple_philox.h"
+#include "tensorflow/core/util/guarded_philox_random.h"
+#include "tensorflow/core/util/work_sharder.h"
+
+#if EIGEN_COMP_GNUC && __cplusplus > 199711L
+#define DISABLE_FLOAT_EQUALITY_WARNING \
+  _Pragma("GCC diagnostic push")       \
+      _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"")
+#define ENABLE_FLOAT_EQUALITY_WARNING _Pragma("GCC diagnostic pop")
+#else
+#define DISABLE_FLOAT_EQUALITY_WARNING
+#define ENABLE_FLOAT_EQUALITY_WARNING
+#endif
+
+#define UNIFORM(X)                                    \
+  if (uniform_remaining == 0) {                       \
+    uniform_remaining = Uniform::kResultElementCount; \
+    uniform_result = uniform(&gen);                   \
+  }                                                   \
+  uniform_remaining--;                                \
+  CT X = uniform_result[uniform_remaining]
+
+namespace tensorflow {
+namespace {
+
+static constexpr int kReservedSamplesPerOutput = 256;
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+
+// We will compute half-precision Poisson samples with float precision
+// intermediate calculations.
+template <typename T>
+struct PoissonComputeType {
+  typedef T ComputeType;
+};
+
+template <>
+struct PoissonComputeType<Eigen::half> {
+  typedef float ComputeType;
+};
+
+}  // namespace
+
+namespace functor {
+
+template <typename Device, typename T>
+struct PoissonFunctor {
+  void operator()(OpKernelContext* ctx, const Device& d, const T* rate_flat,
+                  int num_rate, int num_samples,
+                  const random::PhiloxRandom& rng, T* samples_flat);
+};
+
+template <typename T>
+struct PoissonFunctor<CPUDevice, T> {
+  void operator()(OpKernelContext* ctx, const CPUDevice& d, const T* rate_flat,
+                  int num_rate, int num_samples,
+                  const random::PhiloxRandom& rng, T* samples_flat) {
+    // Two different algorithms are employed, depending on the size of
+    // rate.
+    // If rate < 10, we use an algorithm attributed to Knuth:
+    // Seminumerical Algorithms. Art of Computer Programming, Volume 2.
+    //
+    // This algorithm runs in O(rate) time, and will require O(rate)
+    // uniform
+    // variates.
+    //
+    // If rate >= 10 we use a transformation-rejection algorithm from
+    // pairs
+    // of uniform random variables due to Hormann.
+    // http://www.sciencedirect.com/science/article/pii/0167668793909974
+    //
+    // The algorithm has an acceptance rate of ~89% for the smallest rate
+    // (~10),
+    // and higher accept rates for higher rate, so runtime is
+    // O(NumRate * NumSamples * k) with k ~ 1 / 0.89.
+    //
+    // We partition work first across rates then across
+    // samples-per-rate to
+    // avoid a couple flops which can be done on a per-rate basis.
+
+    typedef random::UniformDistribution<random::PhiloxRandom, CT> Uniform;
+
+    auto DoWork = [num_samples, num_rate, &rng, samples_flat, rate_flat](
+        int start_output, int limit_output) {
+      // Capturing "rng" by value would only make a copy for the _shared_
+      // lambda.  Since we want to let each worker have its own copy, we pass
+      // "rng" by reference and explicitly do a copy assignment.
+
+      Uniform uniform;
+      typename Uniform::ResultType uniform_result;
+      for (int64 output_idx = start_output; output_idx < limit_output;
+           /* output_idx incremented within inner loop below */) {
+        const int64 rate_idx = output_idx / num_samples;
+
+        // Several calculations can be done on a per-rate basis.
+        const CT rate = CT(rate_flat[rate_idx]);
+
+        auto samples_rate_output = samples_flat + rate_idx;
+
+        if (rate < CT(10)) {
+          // Knuth's algorithm for generating Poisson random variates.
+          // Given a Poisson process, the time between events is exponentially
+          // distributed. If we have a Poisson process with rate lambda, then,
+          // the time between events is distributed Exp(lambda). If X ~
+          // Uniform(0, 1), then Y ~ Exp(lambda), where Y = -log(X) / lambda.
+          // Thus to simulate a Poisson draw, we can draw X_i ~ Exp(lambda),
+          // and N ~ Poisson(lambda), where N is the least number such that
+          // \sum_i^N X_i > 1.
+          const CT exp_neg_rate = Eigen::numext::exp(-rate);
+
+          // Compute the rest of the samples for the current rate value.
+          for (int64 sample_idx = output_idx % num_samples;
+               sample_idx < num_samples && output_idx < limit_output;
+               sample_idx++, output_idx++) {
+            random::PhiloxRandom gen = rng;
+            gen.Skip(kReservedSamplesPerOutput * output_idx);
+            int16 uniform_remaining = 0;
+
+            CT prod = 1;
+            CT x = 0;
+
+            // Keep trying until we surpass e^(-rate). This will take
+            // expected time proportional to rate.
+            while (true) {
+              UNIFORM(u);
+              prod = prod * u;
+              if (prod <= exp_neg_rate) {
+                samples_rate_output[sample_idx * num_rate] = T(x);
+                break;
+              }
+              x += 1;
+            }
+          }
+          continue;
+        }
+        // Transformed rejection due to Hormann.
+        //
+        // Given a CDF F(x), and G(x), a dominating distribution chosen such
+        // that it is close to the inverse CDF F^-1(x), compute the following
+        // steps:
+        //
+        // 1) Generate U and V, two independent random variates. Set U = U - 0.5
+        // (this step isn't strictly necessary, but is done to make some
+        // calculations symmetric and convenient. Henceforth, G is defined on
+        // [-0.5, 0.5]).
+        //
+        // 2) If V <= alpha * F'(G(U)) * G'(U), return floor(G(U)), else return
+        // to step 1. alpha is the acceptance probability of the rejection
+        // algorithm.
+        //
+        // For more details on transformed rejection, see:
+        // http://citeseer.ist.psu.edu/viewdoc/citations;jsessionid=1BEB35946CC807879F55D42512E5490C?doi=10.1.1.48.3054.
+        //
+        // The dominating distribution in this case:
+        //
+        // G(u) = (2 * a / (2 - |u|) + b) * u + c
+
+        using Eigen::numext::log;
+        const CT log_rate = log(rate);
+
+        // Constants used to define the dominating distribution. Names taken
+        // from Hormann's paper. Constants were chosen to define the tightest
+        // G(u) for the inverse Poisson CDF.
+        const CT b = CT(0.931) + CT(2.53) * Eigen::numext::sqrt(rate);
+        const CT a = CT(-0.059) + CT(0.02483) * b;
+
+        // This is the inverse acceptance rate. At a minimum (when rate = 10),
+        // this corresponds to ~75% acceptance. As the rate becomes larger, this
+        // approaches ~89%.
+        const CT inv_alpha = CT(1.1239) + CT(1.1328) / (b - CT(3.4));
+
+        // Compute the rest of the samples for the current rate value.
+        for (int64 sample_idx = output_idx % num_samples;
+             sample_idx < num_samples && output_idx < limit_output;
+             sample_idx++, output_idx++) {
+          random::PhiloxRandom gen = rng;
+          gen.Skip(kReservedSamplesPerOutput * output_idx);
+          int16 uniform_remaining = 0;
+
+          while (true) {
+            UNIFORM(u);
+            u -= CT(0.5);
+            UNIFORM(v);
+
+            CT u_shifted = CT(0.5) - Eigen::numext::abs(u);
+            CT k = Eigen::numext::floor((CT(2) * a / u_shifted + b) * u + rate +
+                                        CT(0.43));
+
+            // When alpha * f(G(U)) * G'(U) is close to 1, it is possible to
+            // find a rectangle (-u_r, u_r) x (0, v_r) under the curve, such
+            // that if v <= v_r and |u| <= u_r, then we can accept.
+            // Here v_r = 0.9227 - 3.6224 / (b - 2) and u_r = 0.43.
+            if (u_shifted >= CT(0.07) &&
+                v <= CT(0.9277) - CT(3.6224) / (b - CT(2))) {
+              samples_rate_output[sample_idx * num_rate] = T(k);
+              break;
+            }
+
+            if (k < 0 || (u_shifted < CT(0.013) && v > u_shifted)) {
+              continue;
+            }
+
+            // The expression below is equivalent to the computation of step 2)
+            // in transformed rejection (v <= alpha * F'(G(u)) * G'(u)).
+            CT s = log(v * inv_alpha / (a / (u_shifted * u_shifted) + b));
+            CT t = -rate + k * log_rate - Eigen::numext::lgamma(k + 1);
+            if (s <= t) {
+              samples_rate_output[sample_idx * num_rate] = T(k);
+              break;
+            }
+          }
+        }
+      }
+    };
+
+    // This will depend on rate.
+    // For rate < 10, on average, O(rate) calls to uniform are
+    // needed, with that
+    // many multiplies. ~10 uniform calls on average with ~25 cost op calls.
+    //
+    // Very roughly, for rate >= 10, the single call to log + call to
+    // lgamma
+    // occur for ~60 percent of samples.
+    // 2 x 100 (64-bit cycles per log) * 0.62 = ~124
+    // Additionally, there are ~10 other ops (+, *, /, ...) at 3-6 cycles each:
+    // 40 * .62  = ~25.
+    //
+    // Finally, there are several other ops that are done every loop along with
+    // 2 uniform generations along with 5 other ops at 3-6 cycles each.
+    // ~15 / .89 = ~16
+    //
+    // In total this should be ~165 + 2 * Uniform::kElementCost.
+    // We assume that half the tensor has rate < 10, so on average 6
+    // uniform's
+    // will be needed. We will upper bound the other op cost by the one for
+    // rate > 10.
+    static const int kElementCost = 165 + 6 * Uniform::kElementCost +
+                                    6 * random::PhiloxRandom::kElementCost;
+    auto worker_threads = *(ctx->device()->tensorflow_cpu_worker_threads());
+    Shard(worker_threads.num_threads, worker_threads.workers,
+          num_rate * num_samples, kElementCost, DoWork);
+  }
+
+ private:
+  typedef typename PoissonComputeType<T>::ComputeType CT;
+};
+
+}  // namespace functor
+
+namespace {
+
+// Samples from one or more Poisson distributions.
+template <typename T>
+class RandomPoissonOp : public OpKernel {
+ public:
+  explicit RandomPoissonOp(OpKernelConstruction* context) : OpKernel(context) {
+    OP_REQUIRES_OK(context, generator_.Init(context));
+  }
+
+  void Compute(OpKernelContext* ctx) override {
+    const Tensor& shape_t = ctx->input(0);
+    const Tensor& rate_t = ctx->input(1);
+
+    OP_REQUIRES(ctx,
+                TensorShapeUtils::IsVector(shape_t.shape()) &&
+                    (shape_t.dtype() == DataType::DT_INT32 ||
+                     shape_t.dtype() == DataType::DT_INT64),
+                errors::InvalidArgument(
+                    "shape must be a vector of {int32,int64}, got shape: ",
+                    shape_t.DebugString()));
+    TensorShape samples_shape;
+    if (shape_t.dtype() == DataType::DT_INT32) {
+      auto vec = shape_t.flat<int32>();
+      OP_REQUIRES_OK(ctx, TensorShapeUtils::MakeShape(vec.data(), vec.size(),
+                                                      &samples_shape));
+    } else if (shape_t.dtype() == DataType::DT_INT64) {
+      auto vec = shape_t.flat<int64>();
+      OP_REQUIRES_OK(ctx, TensorShapeUtils::MakeShape(vec.data(), vec.size(),
+                                                      &samples_shape));
+    }
+    const int64 num_samples = samples_shape.num_elements();
+    OP_REQUIRES(ctx, num_samples > 0,
+                errors::InvalidArgument(
+                    "Input shape should have non-zero element count, got: ",
+                    num_samples));
+
+    samples_shape.AppendShape(rate_t.shape());
+    // Allocate output samples.
+    Tensor* samples_t = nullptr;
+    OP_REQUIRES_OK(ctx, ctx->allocate_output(0, samples_shape, &samples_t));
+
+    const auto rate_flat = rate_t.flat<T>().data();
+    const int64 num_rate = rate_t.NumElements();
+    OP_REQUIRES(
+        ctx, num_rate > 0,
+        errors::InvalidArgument(
+            "Input rate should have non-zero element count, got: ", num_rate));
+    auto samples_flat = samples_t->flat<T>().data();
+    random::PhiloxRandom rng = generator_.ReserveRandomOutputs(
+        num_samples * num_rate, kReservedSamplesPerOutput);
+
+    functor::PoissonFunctor<CPUDevice, T>()(ctx, ctx->eigen_device<CPUDevice>(),
+                                            rate_flat, num_rate, num_samples,
+                                            rng, samples_flat);
+  }
+
+ private:
+  GuardedPhiloxRandom generator_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(RandomPoissonOp);
+};
+}  // namespace
+
+#undef UNIFORM
+
+#define REGISTER(TYPE)                                                        \
+  REGISTER_KERNEL_BUILDER(                                                    \
+      Name("RandomPoisson").Device(DEVICE_CPU).TypeConstraint<TYPE>("dtype"), \
+      RandomPoissonOp<TYPE>);
+
+TF_CALL_half(REGISTER);
+TF_CALL_float(REGISTER);
+TF_CALL_double(REGISTER);
+
+#undef REGISTER
+
+}  // end namespace tensorflow
author	A. Unique TensorFlower <gardener@tensorflow.org>	2017-02-07 18:08:21 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-02-07 18:26:46 -0800
commit	8b07605e45f55c942d1436116fd5b0cc83a29e1d (patch)
tree	0dbcbd39e4ba834811f10af5bacc805aa56e397f /tensorflow/core/kernels/random_poisson_op.cc
parent	f6e69848335d82158c0af16cb5d36a3220cdc33d (diff)