aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/random_op_test.cc
blob: 751b61cfba8bc21d3319668ef6ddacf131ee7b4a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#include <random>

#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
#include "tensorflow/core/lib/random/philox_random.h"
#include "tensorflow/core/platform/test_benchmark.h"
#include "tensorflow/core/public/tensor.h"
#include <gtest/gtest.h>

namespace tensorflow {

Tensor Int32(int32 v) {
  Tensor t(DT_INT32, TensorShape({}));
  t.scalar<int32>()() = v;
  return t;
}

Graph* RandomUniform(int64 n) {
  Graph* g = new Graph(OpRegistry::Global());
  test::graph::RandomUniform(g, test::graph::Constant(g, Int32(n)), DT_FLOAT);
  return g;
}

Graph* RandomNormal(int64 n) {
  Graph* g = new Graph(OpRegistry::Global());
  test::graph::RandomGaussian(g, test::graph::Constant(g, Int32(n)), DT_FLOAT);
  return g;
}

Graph* RandomParameters(int64 n) {
  Graph* g = new Graph(OpRegistry::Global());
  test::graph::RandomParameters(g, test::graph::Constant(g, Int32(n)),
                                DT_FLOAT);
  return g;
}

#define BM_RNG(DEVICE, RNG)                                   \
  static void BM_##DEVICE##_##RNG(int iters, int arg) {       \
    testing::ItemsProcessed(static_cast<int64>(iters) * arg); \
    test::Benchmark(#DEVICE, RNG(arg)).Run(iters);            \
  }                                                           \
  BENCHMARK(BM_##DEVICE##_##RNG)->Range(1 << 20, 8 << 20);

BM_RNG(cpu, RandomUniform);
BM_RNG(cpu, RandomNormal);
BM_RNG(cpu, RandomParameters);

BM_RNG(gpu, RandomUniform);
BM_RNG(gpu, RandomNormal);
BM_RNG(gpu, RandomParameters);

static void BM_PhiloxRandom(int iters) {
  // Fill 2M random numbers
  int count = 2 << 20;

  testing::ItemsProcessed(static_cast<int64>(iters) * count);

  random::PhiloxRandom gen(0x12345);

  int val = 1;
  for (int i = 0; i < iters; ++i) {
    for (int j = 0; j < count; j += 4) {
      /// each invocation of gen() returns 128-bit samples
      auto samples = gen();

      // use the result trivially so the compiler does not optimize it away
      val ^= samples[0] ^ samples[1] ^ samples[2] ^ samples[3];
    }
  }

  // A anchor point to make sure the compiler does not cut corners
  CHECK(val) << val;
}
BENCHMARK(BM_PhiloxRandom);

static void BM_StdMTRandom(int iters) {
  // Fill 2M random numbers
  int count = 2 << 20;

  testing::ItemsProcessed(static_cast<int64>(iters) * count);

  std::mt19937 gen(0x12345);

  int val = 1;
  for (int i = 0; i < iters; ++i) {
    for (int j = 0; j < count; ++j) {
      /// each invocation of gen() returns 32-bit sample
      uint32 sample = gen();

      // use the result trivially so the compiler does not optimize it away
      val ^= sample;
    }
  }

  // A anchor point to make sure the compiler does not cut corners
  CHECK(val) << val;
}
BENCHMARK(BM_StdMTRandom);

}  // end namespace tensorflow