Added benchmarks for full reduction

author: Benoit Steiner <benoit.steiner.goog@gmail.com> 2016-02-29 14:57:52 -0800
committer: Benoit Steiner <benoit.steiner.goog@gmail.com> 2016-02-29 14:57:52 -0800
commit: 56a3ada6701b8e8645df4e00a2ef93d45a4f970a (patch)
tree: 2d451fcd6b51461fea1f04543744c8054e8f0f48 /bench
parent: b2075cb7a2d321a11f2c9b96877eaf2d49dc1b25 (diff)
2 files changed, 25 insertions, 2 deletions
diff --git a/bench/tensors/tensor_benchmarks.h b/bench/tensors/tensor_benchmarks.h
index 131d056b4..d916f787e 100644
--- a/bench/tensors/tensor_benchmarks.h
+++ b/bench/tensors/tensor_benchmarks.h
@@ -297,7 +297,8 @@ template <typename Device, typename T> class BenchmarkSuite {
     input_size[0] = k_;
     input_size[1] = n_;
     const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(b_, input_size);
-    const Eigen::array<TensorIndex, 1> output_size = {{n_}};
+    Eigen::array<TensorIndex, 1> output_size;
+    output_size[0] = n_;
     TensorMap<Tensor<T, 1, 0, TensorIndex>, Eigen::Aligned> C(c_, output_size);
 
 #ifndef EIGEN_HAS_INDEX_LIST
@@ -325,7 +326,8 @@ template <typename Device, typename T> class BenchmarkSuite {
     input_size[1] = n_;
     const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(
         b_, input_size);
-    const Eigen::array<TensorIndex, 1> output_size = {{k_}};
+    Eigen::array<TensorIndex, 1> output_size;
+    output_size[0] = k_;
     TensorMap<Tensor<T, 1, 0, TensorIndex>, Eigen::Aligned> C(
         c_, output_size);
 
@@ -347,6 +349,26 @@ template <typename Device, typename T> class BenchmarkSuite {
     finalizeBenchmark(static_cast<int64_t>(k_) * n_ * num_iters);
   }
 
+  // Full reduction
+  void fullReduction(int num_iters) {
+    Eigen::array<TensorIndex, 2> input_size;
+    input_size[0] = k_;
+    input_size[1] = n_;
+    const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(
+        b_, input_size);
+    const Eigen::array<TensorIndex, 0> output_size;
+    TensorMap<Tensor<float, 0, 0, TensorIndex>, Eigen::Aligned> C(
+        c_, output_size);
+
+    StartBenchmarkTiming();
+    for (int iter = 0; iter < num_iters; ++iter) {
+      C.device(device_) = B.sum();
+    }
+    // Record the number of FLOP executed per second (assuming one operation
+    // per value)
+    finalizeBenchmark(static_cast<int64_t>(k_) * n_ * num_iters);
+  }
+
   // do a contraction which is equivalent to a matrix multiplication
   void contraction(int num_iters) {
     Eigen::array<TensorIndex, 2> sizeA;
diff --git a/bench/tensors/tensor_benchmarks_gpu.cu b/bench/tensors/tensor_benchmarks_gpu.cu
index a6f594382..76d68c5c1 100644
--- a/bench/tensors/tensor_benchmarks_gpu.cu
+++ b/bench/tensors/tensor_benchmarks_gpu.cu
@@ -33,6 +33,7 @@ BM_FuncGPU(algebraicFunc);
 BM_FuncGPU(transcendentalFunc);
 BM_FuncGPU(rowReduction);
 BM_FuncGPU(colReduction);
+BM_FuncGPU(fullReduction);
 
 
 // Contractions
author	Benoit Steiner <benoit.steiner.goog@gmail.com>	2016-02-29 14:57:52 -0800
committer	Benoit Steiner <benoit.steiner.goog@gmail.com>	2016-02-29 14:57:52 -0800
commit	56a3ada6701b8e8645df4e00a2ef93d45a4f970a (patch)
tree	2d451fcd6b51461fea1f04543744c8054e8f0f48 /bench
parent	b2075cb7a2d321a11f2c9b96877eaf2d49dc1b25 (diff)