diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-02-29 14:57:52 -0800 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-02-29 14:57:52 -0800 |
commit | 56a3ada6701b8e8645df4e00a2ef93d45a4f970a (patch) | |
tree | 2d451fcd6b51461fea1f04543744c8054e8f0f48 /bench | |
parent | b2075cb7a2d321a11f2c9b96877eaf2d49dc1b25 (diff) |
Added benchmarks for full reduction
Diffstat (limited to 'bench')
-rw-r--r-- | bench/tensors/tensor_benchmarks.h | 26 | ||||
-rw-r--r-- | bench/tensors/tensor_benchmarks_gpu.cu | 1 |
2 files changed, 25 insertions, 2 deletions
diff --git a/bench/tensors/tensor_benchmarks.h b/bench/tensors/tensor_benchmarks.h index 131d056b4..d916f787e 100644 --- a/bench/tensors/tensor_benchmarks.h +++ b/bench/tensors/tensor_benchmarks.h @@ -297,7 +297,8 @@ template <typename Device, typename T> class BenchmarkSuite { input_size[0] = k_; input_size[1] = n_; const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(b_, input_size); - const Eigen::array<TensorIndex, 1> output_size = {{n_}}; + Eigen::array<TensorIndex, 1> output_size; + output_size[0] = n_; TensorMap<Tensor<T, 1, 0, TensorIndex>, Eigen::Aligned> C(c_, output_size); #ifndef EIGEN_HAS_INDEX_LIST @@ -325,7 +326,8 @@ template <typename Device, typename T> class BenchmarkSuite { input_size[1] = n_; const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B( b_, input_size); - const Eigen::array<TensorIndex, 1> output_size = {{k_}}; + Eigen::array<TensorIndex, 1> output_size; + output_size[0] = k_; TensorMap<Tensor<T, 1, 0, TensorIndex>, Eigen::Aligned> C( c_, output_size); @@ -347,6 +349,26 @@ template <typename Device, typename T> class BenchmarkSuite { finalizeBenchmark(static_cast<int64_t>(k_) * n_ * num_iters); } + // Full reduction + void fullReduction(int num_iters) { + Eigen::array<TensorIndex, 2> input_size; + input_size[0] = k_; + input_size[1] = n_; + const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B( + b_, input_size); + const Eigen::array<TensorIndex, 0> output_size; + TensorMap<Tensor<float, 0, 0, TensorIndex>, Eigen::Aligned> C( + c_, output_size); + + StartBenchmarkTiming(); + for (int iter = 0; iter < num_iters; ++iter) { + C.device(device_) = B.sum(); + } + // Record the number of FLOP executed per second (assuming one operation + // per value) + finalizeBenchmark(static_cast<int64_t>(k_) * n_ * num_iters); + } + // do a contraction which is equivalent to a matrix multiplication void contraction(int num_iters) { Eigen::array<TensorIndex, 2> sizeA; diff --git a/bench/tensors/tensor_benchmarks_gpu.cu b/bench/tensors/tensor_benchmarks_gpu.cu index a6f594382..76d68c5c1 100644 --- a/bench/tensors/tensor_benchmarks_gpu.cu +++ b/bench/tensors/tensor_benchmarks_gpu.cu @@ -33,6 +33,7 @@ BM_FuncGPU(algebraicFunc); BM_FuncGPU(transcendentalFunc); BM_FuncGPU(rowReduction); BM_FuncGPU(colReduction); +BM_FuncGPU(fullReduction); // Contractions |