aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/reduction_ops_test.cc
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2017-08-20 22:25:23 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-08-20 22:29:05 -0700
commitec0e1e580c1eb46afd5a81af8f925d8813e7ab50 (patch)
tree4fdf8397f5eb7166e351e9fe2b8d4f79dd4bb452 /tensorflow/core/kernels/reduction_ops_test.cc
parentbadd5456977e2b981a08cd5d6e41a292ea6eafda (diff)
Automated g4 rollback of changelist 165773305
PiperOrigin-RevId: 165887626
Diffstat (limited to 'tensorflow/core/kernels/reduction_ops_test.cc')
-rw-r--r--tensorflow/core/kernels/reduction_ops_test.cc163
1 files changed, 34 insertions, 129 deletions
diff --git a/tensorflow/core/kernels/reduction_ops_test.cc b/tensorflow/core/kernels/reduction_ops_test.cc
index 9bbe993a2f..9cdebdd4f2 100644
--- a/tensorflow/core/kernels/reduction_ops_test.cc
+++ b/tensorflow/core/kernels/reduction_ops_test.cc
@@ -15,7 +15,6 @@ limitations under the License.
#include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h"
#include "tensorflow/core/framework/tensor.h"
-#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/platform/test.h"
#include "tensorflow/core/platform/test_benchmark.h"
@@ -23,59 +22,14 @@ namespace tensorflow {
// Creates a Graph which "reduce"s a 3D float tensor of "num" elements
// into a scalar.
-template <typename T>
-static Graph* ToScalar(const string& reduce, int num_x, int num_y) {
- auto* g = new Graph(OpRegistry::Global());
- Tensor data(DataTypeToEnum<T>::value, TensorShape({num_x, num_y}));
- data.flat<T>().setRandom();
- Tensor axes(DT_INT32, TensorShape({2}));
- axes.flat<int32>()(0) = 0;
- axes.flat<int32>()(1) = 1;
- test::graph::Reduce(g, reduce, test::graph::Constant(g, data),
- test::graph::Constant(g, axes));
- return g;
-}
-
-static Graph* ColReduce(const string& reduce, int num_x, int num_y) {
- auto* g = new Graph(OpRegistry::Global());
- Tensor data(DT_FLOAT, TensorShape({num_x, num_y}));
+static Graph* ToScalar(const string& reduce, int num) {
+ Graph* g = new Graph(OpRegistry::Global());
+ Tensor data(DT_FLOAT, TensorShape({64, 64, num / (64 * 64)}));
data.flat<float>().setRandom();
- Tensor axes(DT_INT32, TensorShape({1}));
+ Tensor axes(DT_INT32, TensorShape({3}));
axes.flat<int32>()(0) = 0;
- test::graph::Reduce(g, reduce, test::graph::Constant(g, data),
- test::graph::Constant(g, axes));
- return g;
-}
-
-static Graph* RowReduce(const string& reduce, int num_x, int num_y) {
- auto* g = new Graph(OpRegistry::Global());
- Tensor data(DT_FLOAT, TensorShape({num_x, num_y}));
- data.flat<float>().setRandom();
- Tensor axes(DT_INT32, TensorShape({1}));
- axes.flat<int32>()(0) = 1;
- test::graph::Reduce(g, reduce, test::graph::Constant(g, data),
- test::graph::Constant(g, axes));
- return g;
-}
-
-static Graph* ThreeDYReduce(const string& reduce, int num_y, int num_z) {
- auto* g = new Graph(OpRegistry::Global());
- Tensor data(DT_FLOAT, TensorShape({4, num_y, num_z}));
- data.flat<float>().setRandom();
- Tensor axes(DT_INT32, TensorShape({1}));
- axes.flat<int32>()(0) = 1;
- test::graph::Reduce(g, reduce, test::graph::Constant(g, data),
- test::graph::Constant(g, axes));
- return g;
-}
-
-static Graph* ThreeDXZReduce(const string& reduce, int num_y, int num_z) {
- auto* g = new Graph(OpRegistry::Global());
- Tensor data(DT_FLOAT, TensorShape({4, num_y, num_z}));
- data.flat<float>().setRandom();
- Tensor axes(DT_INT32, TensorShape({2}));
- axes.flat<int32>()(0) = 0;
- axes.flat<int32>()(1) = 2;
+ axes.flat<int32>()(1) = 1;
+ axes.flat<int32>()(2) = 2;
test::graph::Reduce(g, reduce, test::graph::Constant(g, data),
test::graph::Constant(g, axes));
return g;
@@ -83,100 +37,51 @@ static Graph* ThreeDXZReduce(const string& reduce, int num_y, int num_z) {
// Creates a bench which reduces a 3D tensor with total "num" floats
// into a scalar on a "device". Runs the bench for "iters" times.
-template <typename T>
static void ReduceToScalar(int iters, const string& device,
- const string& reduce, int num_x, int num_y) {
- testing::ItemsProcessed(static_cast<int64>(iters) * num_x * num_y);
- testing::BytesProcessed(static_cast<int64>(iters) * num_x * num_y *
- sizeof(T));
- test::Benchmark(device, ToScalar<T>(reduce, num_x, num_y)).Run(iters);
-}
-
-static void DoRowReduce(int iters, const string& device, const string& reduce,
- int num_x, int num_y) {
- testing::ItemsProcessed(static_cast<int64>(iters) * num_x * num_y);
- testing::BytesProcessed(static_cast<int64>(iters) * num_x * num_y *
- sizeof(float));
- test::Benchmark(device, RowReduce(reduce, num_x, num_y)).Run(iters);
-}
-
-static void DoColReduce(int iters, const string& device, const string& reduce,
- int num_x, int num_y) {
- testing::ItemsProcessed(static_cast<int64>(iters) * num_x * num_y);
- testing::BytesProcessed(static_cast<int64>(iters) * num_x * num_y *
- sizeof(float));
- test::Benchmark(device, ColReduce(reduce, num_x, num_y)).Run(iters);
-}
-
-static void Do3DYReduce(int iters, const string& device, const string& reduce,
- int num_x, int num_y) {
- testing::ItemsProcessed(static_cast<int64>(iters) * num_x * num_y);
- testing::BytesProcessed(static_cast<int64>(iters) * num_x * num_y *
- sizeof(float));
- test::Benchmark(device, ThreeDYReduce(reduce, num_x, num_y)).Run(iters);
-}
-
-static void Do3DXZReduce(int iters, const string& device, const string& reduce,
- int num_x, int num_y) {
- testing::ItemsProcessed(static_cast<int64>(iters) * num_x * num_y);
- testing::BytesProcessed(static_cast<int64>(iters) * num_x * num_y *
- sizeof(float));
- test::Benchmark(device, ThreeDXZReduce(reduce, num_x, num_y)).Run(iters);
-}
-
-static void BM_Sum2DToScalarGPU(int iters, int num_x, int num_y) {
- ReduceToScalar<float>(iters, "gpu", "Sum", num_x, num_y);
-}
-BENCHMARK(BM_Sum2DToScalarGPU)->RangePair(1, 8192, 1, 8192);
-
-static void BM_Sum2DToScalarGPUComplex(int iters, int num_x, int num_y) {
- ReduceToScalar<std::complex<float>>(iters, "gpu", "Sum", num_x, num_y);
-}
-BENCHMARK(BM_Sum2DToScalarGPUComplex)->RangePair(1, 8192, 1, 8192);
-
-static void BM_Sum2DToScalarGPUHalf(int iters, int num_x, int num_y) {
- ReduceToScalar<Eigen::half>(iters, "gpu", "Sum", num_x, num_y);
+ const string& reduce, int num) {
+ testing::ItemsProcessed(static_cast<int64>(iters) * num);
+ testing::BytesProcessed(static_cast<int64>(iters) * num * sizeof(float));
+ test::Benchmark(device, ToScalar(reduce, num)).Run(iters);
}
-BENCHMARK(BM_Sum2DToScalarGPUHalf)->RangePair(1, 8192, 1, 8192);
-static void BM_Sum2DRowReduceGPU(int iters, int num_x, int num_y) {
- DoRowReduce(iters, "gpu", "Sum", num_x, num_y);
+static void BM_Sum3DToScalarCPU(int iters, int num) {
+ ReduceToScalar(iters, "cpu", "Sum", num);
}
-BENCHMARK(BM_Sum2DRowReduceGPU)->RangePair(1, 8192, 1, 8192);
+BENCHMARK(BM_Sum3DToScalarCPU)->Range(1 << 13, 1 << 20);
-static void BM_Sum2DColumnReduceGPU(int iters, int num_x, int num_y) {
- DoColReduce(iters, "gpu", "Sum", num_x, num_y);
+static void BM_Max3DToScalarCPU(int iters, int num) {
+ ReduceToScalar(iters, "cpu", "Max", num);
}
-BENCHMARK(BM_Sum2DColumnReduceGPU)->RangePair(1, 8192, 1, 8192);
+BENCHMARK(BM_Max3DToScalarCPU)->Range(1 << 13, 1 << 20);
-static void BM_Sum3DYReduceGPU(int iters, int num_x, int num_y) {
- Do3DYReduce(iters, "gpu", "Sum", num_x, num_y);
+static void BM_Prod3DToScalarCPU(int iters, int num) {
+ ReduceToScalar(iters, "cpu", "Prod", num);
}
-BENCHMARK(BM_Sum3DYReduceGPU)->RangePair(64, 4096, 64, 4096);
+BENCHMARK(BM_Prod3DToScalarCPU)->Range(1 << 13, 1 << 20);
-static void BM_Sum3DXZReduceGPU(int iters, int num_x, int num_y) {
- Do3DXZReduce(iters, "gpu", "Sum", num_x, num_y);
+static void BM_Mean3DToScalarCPU(int iters, int num) {
+ ReduceToScalar(iters, "cpu", "Mean", num);
}
-BENCHMARK(BM_Sum3DXZReduceGPU)->RangePair(64, 4096, 64, 4096);
+BENCHMARK(BM_Mean3DToScalarCPU)->Range(1 << 13, 1 << 20);
-static void BM_Mean2DToScalarGPU(int iters, int num_x, int num_y) {
- ReduceToScalar<float>(iters, "gpu", "Mean", num_x, num_y);
+static void BM_Sum3DToScalarGPU(int iters, int num) {
+ ReduceToScalar(iters, "gpu", "Sum", num);
}
-BENCHMARK(BM_Mean2DToScalarGPU)->RangePair(2048, 8192, 2048, 8192);
+BENCHMARK(BM_Sum3DToScalarGPU)->Range(1 << 13, 1 << 20);
-static void BM_Max2DToScalarGPU(int iters, int num_x, int num_y) {
- ReduceToScalar<float>(iters, "gpu", "Max", num_x, num_y);
+static void BM_Max3DToScalarGPU(int iters, int num) {
+ ReduceToScalar(iters, "gpu", "Max", num);
}
-BENCHMARK(BM_Max2DToScalarGPU)->RangePair(2048, 8192, 2048, 8192);
+BENCHMARK(BM_Max3DToScalarGPU)->Range(1 << 13, 1 << 20);
-static void BM_Min2DToScalarGPU(int iters, int num_x, int num_y) {
- ReduceToScalar<float>(iters, "gpu", "Min", num_x, num_y);
+static void BM_Prod3DToScalarGPU(int iters, int num) {
+ ReduceToScalar(iters, "gpu", "Prod", num);
}
-BENCHMARK(BM_Min2DToScalarGPU)->RangePair(2048, 8192, 2048, 8192);
+BENCHMARK(BM_Prod3DToScalarGPU)->Range(1 << 13, 1 << 20);
-static void BM_Bool2DToScalarGPU(int iters, int num_x, int num_y) {
- ReduceToScalar<bool>(iters, "gpu", "All", num_x, num_y);
+static void BM_Mean3DToScalarGPU(int iters, int num) {
+ ReduceToScalar(iters, "gpu", "Mean", num);
}
-BENCHMARK(BM_Bool2DToScalarGPU)->RangePair(2048, 8192, 2048, 8192);
+BENCHMARK(BM_Mean3DToScalarGPU)->Range(1 << 13, 1 << 20);
} // end namespace tensorflow