diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-02-29 14:57:52 -0800 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-02-29 14:57:52 -0800 |
commit | 56a3ada6701b8e8645df4e00a2ef93d45a4f970a (patch) | |
tree | 2d451fcd6b51461fea1f04543744c8054e8f0f48 /bench/tensors/tensor_benchmarks.h | |
parent | b2075cb7a2d321a11f2c9b96877eaf2d49dc1b25 (diff) |
Added benchmarks for full reduction
Diffstat (limited to 'bench/tensors/tensor_benchmarks.h')
-rw-r--r-- | bench/tensors/tensor_benchmarks.h | 26 |
1 files changed, 24 insertions, 2 deletions
diff --git a/bench/tensors/tensor_benchmarks.h b/bench/tensors/tensor_benchmarks.h index 131d056b4..d916f787e 100644 --- a/bench/tensors/tensor_benchmarks.h +++ b/bench/tensors/tensor_benchmarks.h @@ -297,7 +297,8 @@ template <typename Device, typename T> class BenchmarkSuite { input_size[0] = k_; input_size[1] = n_; const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(b_, input_size); - const Eigen::array<TensorIndex, 1> output_size = {{n_}}; + Eigen::array<TensorIndex, 1> output_size; + output_size[0] = n_; TensorMap<Tensor<T, 1, 0, TensorIndex>, Eigen::Aligned> C(c_, output_size); #ifndef EIGEN_HAS_INDEX_LIST @@ -325,7 +326,8 @@ template <typename Device, typename T> class BenchmarkSuite { input_size[1] = n_; const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B( b_, input_size); - const Eigen::array<TensorIndex, 1> output_size = {{k_}}; + Eigen::array<TensorIndex, 1> output_size; + output_size[0] = k_; TensorMap<Tensor<T, 1, 0, TensorIndex>, Eigen::Aligned> C( c_, output_size); @@ -347,6 +349,26 @@ template <typename Device, typename T> class BenchmarkSuite { finalizeBenchmark(static_cast<int64_t>(k_) * n_ * num_iters); } + // Full reduction + void fullReduction(int num_iters) { + Eigen::array<TensorIndex, 2> input_size; + input_size[0] = k_; + input_size[1] = n_; + const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B( + b_, input_size); + const Eigen::array<TensorIndex, 0> output_size; + TensorMap<Tensor<float, 0, 0, TensorIndex>, Eigen::Aligned> C( + c_, output_size); + + StartBenchmarkTiming(); + for (int iter = 0; iter < num_iters; ++iter) { + C.device(device_) = B.sum(); + } + // Record the number of FLOP executed per second (assuming one operation + // per value) + finalizeBenchmark(static_cast<int64_t>(k_) * n_ * num_iters); + } + // do a contraction which is equivalent to a matrix multiplication void contraction(int num_iters) { Eigen::array<TensorIndex, 2> sizeA; |