1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
|
#include <random>
#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
#include "tensorflow/core/lib/random/philox_random.h"
#include "tensorflow/core/platform/test_benchmark.h"
#include "tensorflow/core/public/tensor.h"
#include <gtest/gtest.h>
namespace tensorflow {
Tensor Int32(int32 v) {
Tensor t(DT_INT32, TensorShape({}));
t.scalar<int32>()() = v;
return t;
}
Graph* RandomUniform(int64 n) {
Graph* g = new Graph(OpRegistry::Global());
test::graph::RandomUniform(g, test::graph::Constant(g, Int32(n)), DT_FLOAT);
return g;
}
Graph* RandomNormal(int64 n) {
Graph* g = new Graph(OpRegistry::Global());
test::graph::RandomGaussian(g, test::graph::Constant(g, Int32(n)), DT_FLOAT);
return g;
}
Graph* RandomParameters(int64 n) {
Graph* g = new Graph(OpRegistry::Global());
test::graph::RandomParameters(g, test::graph::Constant(g, Int32(n)),
DT_FLOAT);
return g;
}
#define BM_RNG(DEVICE, RNG) \
static void BM_##DEVICE##_##RNG(int iters, int arg) { \
testing::ItemsProcessed(static_cast<int64>(iters) * arg); \
test::Benchmark(#DEVICE, RNG(arg)).Run(iters); \
} \
BENCHMARK(BM_##DEVICE##_##RNG)->Range(1 << 20, 8 << 20);
BM_RNG(cpu, RandomUniform);
BM_RNG(cpu, RandomNormal);
BM_RNG(cpu, RandomParameters);
BM_RNG(gpu, RandomUniform);
BM_RNG(gpu, RandomNormal);
BM_RNG(gpu, RandomParameters);
static void BM_PhiloxRandom(int iters) {
// Fill 2M random numbers
int count = 2 << 20;
testing::ItemsProcessed(static_cast<int64>(iters) * count);
random::PhiloxRandom gen(0x12345);
int val = 1;
for (int i = 0; i < iters; ++i) {
for (int j = 0; j < count; j += 4) {
/// each invocation of gen() returns 128-bit samples
auto samples = gen();
// use the result trivially so the compiler does not optimize it away
val ^= samples[0] ^ samples[1] ^ samples[2] ^ samples[3];
}
}
// A anchor point to make sure the compiler does not cut corners
CHECK(val) << val;
}
BENCHMARK(BM_PhiloxRandom);
static void BM_StdMTRandom(int iters) {
// Fill 2M random numbers
int count = 2 << 20;
testing::ItemsProcessed(static_cast<int64>(iters) * count);
std::mt19937 gen(0x12345);
int val = 1;
for (int i = 0; i < iters; ++i) {
for (int j = 0; j < count; ++j) {
/// each invocation of gen() returns 32-bit sample
uint32 sample = gen();
// use the result trivially so the compiler does not optimize it away
val ^= sample;
}
}
// A anchor point to make sure the compiler does not cut corners
CHECK(val) << val;
}
BENCHMARK(BM_StdMTRandom);
} // end namespace tensorflow
|