diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2017-08-20 22:25:23 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-08-20 22:29:05 -0700 |
commit | ec0e1e580c1eb46afd5a81af8f925d8813e7ab50 (patch) | |
tree | 4fdf8397f5eb7166e351e9fe2b8d4f79dd4bb452 /tensorflow/core/kernels/reduction_ops_test.cc | |
parent | badd5456977e2b981a08cd5d6e41a292ea6eafda (diff) |
Automated g4 rollback of changelist 165773305
PiperOrigin-RevId: 165887626
Diffstat (limited to 'tensorflow/core/kernels/reduction_ops_test.cc')
-rw-r--r-- | tensorflow/core/kernels/reduction_ops_test.cc | 163 |
1 files changed, 34 insertions, 129 deletions
diff --git a/tensorflow/core/kernels/reduction_ops_test.cc b/tensorflow/core/kernels/reduction_ops_test.cc index 9bbe993a2f..9cdebdd4f2 100644 --- a/tensorflow/core/kernels/reduction_ops_test.cc +++ b/tensorflow/core/kernels/reduction_ops_test.cc @@ -15,7 +15,6 @@ limitations under the License. #include "tensorflow/core/common_runtime/kernel_benchmark_testlib.h" #include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/types.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/platform/test_benchmark.h" @@ -23,59 +22,14 @@ namespace tensorflow { // Creates a Graph which "reduce"s a 3D float tensor of "num" elements // into a scalar. -template <typename T> -static Graph* ToScalar(const string& reduce, int num_x, int num_y) { - auto* g = new Graph(OpRegistry::Global()); - Tensor data(DataTypeToEnum<T>::value, TensorShape({num_x, num_y})); - data.flat<T>().setRandom(); - Tensor axes(DT_INT32, TensorShape({2})); - axes.flat<int32>()(0) = 0; - axes.flat<int32>()(1) = 1; - test::graph::Reduce(g, reduce, test::graph::Constant(g, data), - test::graph::Constant(g, axes)); - return g; -} - -static Graph* ColReduce(const string& reduce, int num_x, int num_y) { - auto* g = new Graph(OpRegistry::Global()); - Tensor data(DT_FLOAT, TensorShape({num_x, num_y})); +static Graph* ToScalar(const string& reduce, int num) { + Graph* g = new Graph(OpRegistry::Global()); + Tensor data(DT_FLOAT, TensorShape({64, 64, num / (64 * 64)})); data.flat<float>().setRandom(); - Tensor axes(DT_INT32, TensorShape({1})); + Tensor axes(DT_INT32, TensorShape({3})); axes.flat<int32>()(0) = 0; - test::graph::Reduce(g, reduce, test::graph::Constant(g, data), - test::graph::Constant(g, axes)); - return g; -} - -static Graph* RowReduce(const string& reduce, int num_x, int num_y) { - auto* g = new Graph(OpRegistry::Global()); - Tensor data(DT_FLOAT, TensorShape({num_x, num_y})); - data.flat<float>().setRandom(); - Tensor axes(DT_INT32, TensorShape({1})); - axes.flat<int32>()(0) = 1; - test::graph::Reduce(g, reduce, test::graph::Constant(g, data), - test::graph::Constant(g, axes)); - return g; -} - -static Graph* ThreeDYReduce(const string& reduce, int num_y, int num_z) { - auto* g = new Graph(OpRegistry::Global()); - Tensor data(DT_FLOAT, TensorShape({4, num_y, num_z})); - data.flat<float>().setRandom(); - Tensor axes(DT_INT32, TensorShape({1})); - axes.flat<int32>()(0) = 1; - test::graph::Reduce(g, reduce, test::graph::Constant(g, data), - test::graph::Constant(g, axes)); - return g; -} - -static Graph* ThreeDXZReduce(const string& reduce, int num_y, int num_z) { - auto* g = new Graph(OpRegistry::Global()); - Tensor data(DT_FLOAT, TensorShape({4, num_y, num_z})); - data.flat<float>().setRandom(); - Tensor axes(DT_INT32, TensorShape({2})); - axes.flat<int32>()(0) = 0; - axes.flat<int32>()(1) = 2; + axes.flat<int32>()(1) = 1; + axes.flat<int32>()(2) = 2; test::graph::Reduce(g, reduce, test::graph::Constant(g, data), test::graph::Constant(g, axes)); return g; @@ -83,100 +37,51 @@ static Graph* ThreeDXZReduce(const string& reduce, int num_y, int num_z) { // Creates a bench which reduces a 3D tensor with total "num" floats // into a scalar on a "device". Runs the bench for "iters" times. -template <typename T> static void ReduceToScalar(int iters, const string& device, - const string& reduce, int num_x, int num_y) { - testing::ItemsProcessed(static_cast<int64>(iters) * num_x * num_y); - testing::BytesProcessed(static_cast<int64>(iters) * num_x * num_y * - sizeof(T)); - test::Benchmark(device, ToScalar<T>(reduce, num_x, num_y)).Run(iters); -} - -static void DoRowReduce(int iters, const string& device, const string& reduce, - int num_x, int num_y) { - testing::ItemsProcessed(static_cast<int64>(iters) * num_x * num_y); - testing::BytesProcessed(static_cast<int64>(iters) * num_x * num_y * - sizeof(float)); - test::Benchmark(device, RowReduce(reduce, num_x, num_y)).Run(iters); -} - -static void DoColReduce(int iters, const string& device, const string& reduce, - int num_x, int num_y) { - testing::ItemsProcessed(static_cast<int64>(iters) * num_x * num_y); - testing::BytesProcessed(static_cast<int64>(iters) * num_x * num_y * - sizeof(float)); - test::Benchmark(device, ColReduce(reduce, num_x, num_y)).Run(iters); -} - -static void Do3DYReduce(int iters, const string& device, const string& reduce, - int num_x, int num_y) { - testing::ItemsProcessed(static_cast<int64>(iters) * num_x * num_y); - testing::BytesProcessed(static_cast<int64>(iters) * num_x * num_y * - sizeof(float)); - test::Benchmark(device, ThreeDYReduce(reduce, num_x, num_y)).Run(iters); -} - -static void Do3DXZReduce(int iters, const string& device, const string& reduce, - int num_x, int num_y) { - testing::ItemsProcessed(static_cast<int64>(iters) * num_x * num_y); - testing::BytesProcessed(static_cast<int64>(iters) * num_x * num_y * - sizeof(float)); - test::Benchmark(device, ThreeDXZReduce(reduce, num_x, num_y)).Run(iters); -} - -static void BM_Sum2DToScalarGPU(int iters, int num_x, int num_y) { - ReduceToScalar<float>(iters, "gpu", "Sum", num_x, num_y); -} -BENCHMARK(BM_Sum2DToScalarGPU)->RangePair(1, 8192, 1, 8192); - -static void BM_Sum2DToScalarGPUComplex(int iters, int num_x, int num_y) { - ReduceToScalar<std::complex<float>>(iters, "gpu", "Sum", num_x, num_y); -} -BENCHMARK(BM_Sum2DToScalarGPUComplex)->RangePair(1, 8192, 1, 8192); - -static void BM_Sum2DToScalarGPUHalf(int iters, int num_x, int num_y) { - ReduceToScalar<Eigen::half>(iters, "gpu", "Sum", num_x, num_y); + const string& reduce, int num) { + testing::ItemsProcessed(static_cast<int64>(iters) * num); + testing::BytesProcessed(static_cast<int64>(iters) * num * sizeof(float)); + test::Benchmark(device, ToScalar(reduce, num)).Run(iters); } -BENCHMARK(BM_Sum2DToScalarGPUHalf)->RangePair(1, 8192, 1, 8192); -static void BM_Sum2DRowReduceGPU(int iters, int num_x, int num_y) { - DoRowReduce(iters, "gpu", "Sum", num_x, num_y); +static void BM_Sum3DToScalarCPU(int iters, int num) { + ReduceToScalar(iters, "cpu", "Sum", num); } -BENCHMARK(BM_Sum2DRowReduceGPU)->RangePair(1, 8192, 1, 8192); +BENCHMARK(BM_Sum3DToScalarCPU)->Range(1 << 13, 1 << 20); -static void BM_Sum2DColumnReduceGPU(int iters, int num_x, int num_y) { - DoColReduce(iters, "gpu", "Sum", num_x, num_y); +static void BM_Max3DToScalarCPU(int iters, int num) { + ReduceToScalar(iters, "cpu", "Max", num); } -BENCHMARK(BM_Sum2DColumnReduceGPU)->RangePair(1, 8192, 1, 8192); +BENCHMARK(BM_Max3DToScalarCPU)->Range(1 << 13, 1 << 20); -static void BM_Sum3DYReduceGPU(int iters, int num_x, int num_y) { - Do3DYReduce(iters, "gpu", "Sum", num_x, num_y); +static void BM_Prod3DToScalarCPU(int iters, int num) { + ReduceToScalar(iters, "cpu", "Prod", num); } -BENCHMARK(BM_Sum3DYReduceGPU)->RangePair(64, 4096, 64, 4096); +BENCHMARK(BM_Prod3DToScalarCPU)->Range(1 << 13, 1 << 20); -static void BM_Sum3DXZReduceGPU(int iters, int num_x, int num_y) { - Do3DXZReduce(iters, "gpu", "Sum", num_x, num_y); +static void BM_Mean3DToScalarCPU(int iters, int num) { + ReduceToScalar(iters, "cpu", "Mean", num); } -BENCHMARK(BM_Sum3DXZReduceGPU)->RangePair(64, 4096, 64, 4096); +BENCHMARK(BM_Mean3DToScalarCPU)->Range(1 << 13, 1 << 20); -static void BM_Mean2DToScalarGPU(int iters, int num_x, int num_y) { - ReduceToScalar<float>(iters, "gpu", "Mean", num_x, num_y); +static void BM_Sum3DToScalarGPU(int iters, int num) { + ReduceToScalar(iters, "gpu", "Sum", num); } -BENCHMARK(BM_Mean2DToScalarGPU)->RangePair(2048, 8192, 2048, 8192); +BENCHMARK(BM_Sum3DToScalarGPU)->Range(1 << 13, 1 << 20); -static void BM_Max2DToScalarGPU(int iters, int num_x, int num_y) { - ReduceToScalar<float>(iters, "gpu", "Max", num_x, num_y); +static void BM_Max3DToScalarGPU(int iters, int num) { + ReduceToScalar(iters, "gpu", "Max", num); } -BENCHMARK(BM_Max2DToScalarGPU)->RangePair(2048, 8192, 2048, 8192); +BENCHMARK(BM_Max3DToScalarGPU)->Range(1 << 13, 1 << 20); -static void BM_Min2DToScalarGPU(int iters, int num_x, int num_y) { - ReduceToScalar<float>(iters, "gpu", "Min", num_x, num_y); +static void BM_Prod3DToScalarGPU(int iters, int num) { + ReduceToScalar(iters, "gpu", "Prod", num); } -BENCHMARK(BM_Min2DToScalarGPU)->RangePair(2048, 8192, 2048, 8192); +BENCHMARK(BM_Prod3DToScalarGPU)->Range(1 << 13, 1 << 20); -static void BM_Bool2DToScalarGPU(int iters, int num_x, int num_y) { - ReduceToScalar<bool>(iters, "gpu", "All", num_x, num_y); +static void BM_Mean3DToScalarGPU(int iters, int num) { + ReduceToScalar(iters, "gpu", "Mean", num); } -BENCHMARK(BM_Bool2DToScalarGPU)->RangePair(2048, 8192, 2048, 8192); +BENCHMARK(BM_Mean3DToScalarGPU)->Range(1 << 13, 1 << 20); } // end namespace tensorflow |