aboutsummaryrefslogtreecommitdiffhomepage
path: root/bench/tensors
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-02-29 14:57:52 -0800
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-02-29 14:57:52 -0800
commit56a3ada6701b8e8645df4e00a2ef93d45a4f970a (patch)
tree2d451fcd6b51461fea1f04543744c8054e8f0f48 /bench/tensors
parentb2075cb7a2d321a11f2c9b96877eaf2d49dc1b25 (diff)
Added benchmarks for full reduction
Diffstat (limited to 'bench/tensors')
-rw-r--r--bench/tensors/tensor_benchmarks.h26
-rw-r--r--bench/tensors/tensor_benchmarks_gpu.cu1
2 files changed, 25 insertions, 2 deletions
diff --git a/bench/tensors/tensor_benchmarks.h b/bench/tensors/tensor_benchmarks.h
index 131d056b4..d916f787e 100644
--- a/bench/tensors/tensor_benchmarks.h
+++ b/bench/tensors/tensor_benchmarks.h
@@ -297,7 +297,8 @@ template <typename Device, typename T> class BenchmarkSuite {
input_size[0] = k_;
input_size[1] = n_;
const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(b_, input_size);
- const Eigen::array<TensorIndex, 1> output_size = {{n_}};
+ Eigen::array<TensorIndex, 1> output_size;
+ output_size[0] = n_;
TensorMap<Tensor<T, 1, 0, TensorIndex>, Eigen::Aligned> C(c_, output_size);
#ifndef EIGEN_HAS_INDEX_LIST
@@ -325,7 +326,8 @@ template <typename Device, typename T> class BenchmarkSuite {
input_size[1] = n_;
const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(
b_, input_size);
- const Eigen::array<TensorIndex, 1> output_size = {{k_}};
+ Eigen::array<TensorIndex, 1> output_size;
+ output_size[0] = k_;
TensorMap<Tensor<T, 1, 0, TensorIndex>, Eigen::Aligned> C(
c_, output_size);
@@ -347,6 +349,26 @@ template <typename Device, typename T> class BenchmarkSuite {
finalizeBenchmark(static_cast<int64_t>(k_) * n_ * num_iters);
}
+ // Full reduction
+ void fullReduction(int num_iters) {
+ Eigen::array<TensorIndex, 2> input_size;
+ input_size[0] = k_;
+ input_size[1] = n_;
+ const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(
+ b_, input_size);
+ const Eigen::array<TensorIndex, 0> output_size;
+ TensorMap<Tensor<float, 0, 0, TensorIndex>, Eigen::Aligned> C(
+ c_, output_size);
+
+ StartBenchmarkTiming();
+ for (int iter = 0; iter < num_iters; ++iter) {
+ C.device(device_) = B.sum();
+ }
+ // Record the number of FLOP executed per second (assuming one operation
+ // per value)
+ finalizeBenchmark(static_cast<int64_t>(k_) * n_ * num_iters);
+ }
+
// do a contraction which is equivalent to a matrix multiplication
void contraction(int num_iters) {
Eigen::array<TensorIndex, 2> sizeA;
diff --git a/bench/tensors/tensor_benchmarks_gpu.cu b/bench/tensors/tensor_benchmarks_gpu.cu
index a6f594382..76d68c5c1 100644
--- a/bench/tensors/tensor_benchmarks_gpu.cu
+++ b/bench/tensors/tensor_benchmarks_gpu.cu
@@ -33,6 +33,7 @@ BM_FuncGPU(algebraicFunc);
BM_FuncGPU(transcendentalFunc);
BM_FuncGPU(rowReduction);
BM_FuncGPU(colReduction);
+BM_FuncGPU(fullReduction);
// Contractions