1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
|
#include <functional>
#include <memory>
#include <vector>
#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
#include "tensorflow/core/framework/allocator.h"
#include "tensorflow/core/framework/graph.pb.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/framework/types.pb.h"
#include "tensorflow/core/graph/testlib.h"
#include "tensorflow/core/graph/node_builder.h"
#include "tensorflow/core/kernels/ops_testutil.h"
#include "tensorflow/core/kernels/ops_util.h"
#include "tensorflow/core/platform/test_benchmark.h"
#include "tensorflow/core/public/tensor.h"
#include <gtest/gtest.h>
#include "tensorflow/core/lib/core/status_test_util.h"
namespace tensorflow {
namespace {
// For the benchmark, we set up two 2-dimensional tensors, each kDim1 x 'dim'
// in size, and concat them together along "concat_dimension"
template <typename T>
static void SliceHelper(int iters, int size) {
testing::StopTiming();
RequireDefaultOps();
Graph* g = new Graph(OpRegistry::Global());
DataType dt = DataTypeToEnum<T>::v();
int kDim = 100;
int kMaxSize = 15000;
CHECK_LT(size, kMaxSize);
Tensor begin(DT_INT32, TensorShape({2}));
begin.flat<int32>()(0) = 10;
begin.flat<int32>()(1) = 10;
Tensor sizes(DT_INT32, TensorShape({2}));
sizes.flat<int32>()(0) = kDim;
sizes.flat<int32>()(1) = size;
Tensor input(dt, TensorShape({2 * kDim, kMaxSize}));
input.flat<T>().setRandom();
Node* node;
TF_CHECK_OK(NodeBuilder(g->NewName("n"), "Slice")
.Input(test::graph::Constant(g, input))
.Input(test::graph::Constant(g, begin))
.Input(test::graph::Constant(g, sizes))
.Attr("T", dt)
.Finalize(g, &node));
testing::BytesProcessed(static_cast<int64>(iters) * kDim * size * sizeof(T));
testing::StartTiming();
test::Benchmark("cpu", g).Run(iters);
testing::UseRealTime();
}
static void BM_SliceFloat(int iters, int dim2) {
SliceHelper<float>(iters, dim2);
}
BENCHMARK(BM_SliceFloat)->Arg(100)->Arg(1000)->Arg(10000);
static void BM_SliceBFloat16(int iters, int dim2) {
SliceHelper<bfloat16>(iters, dim2);
}
BENCHMARK(BM_SliceBFloat16)->Arg(100)->Arg(1000)->Arg(10000);
} // namespace
} // namespace tensorflow
|