diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-01-28 16:20:36 -0800 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-01-28 16:20:36 -0800 |
commit | c8d5f21941a41556f94e937ea5a91badb7fb9353 (patch) | |
tree | 163d3b29f6ce464803dfa5831aa3e31f91c8c7c8 /bench/tensors/tensor_benchmarks.h | |
parent | 7b3044d086f413fdaf65acd30fc3bc469d43ccc6 (diff) |
Added extra tensor benchmarks
Diffstat (limited to 'bench/tensors/tensor_benchmarks.h')
-rw-r--r-- | bench/tensors/tensor_benchmarks.h | 87 |
1 files changed, 80 insertions, 7 deletions
diff --git a/bench/tensors/tensor_benchmarks.h b/bench/tensors/tensor_benchmarks.h index 071326aa7..6b9d13446 100644 --- a/bench/tensors/tensor_benchmarks.h +++ b/bench/tensors/tensor_benchmarks.h @@ -45,6 +45,20 @@ template <typename Device> class BenchmarkSuite { finalizeBenchmark(m_ * m_ * num_iters); } + void typeCasting(int num_iters) { + eigen_assert(m_ == n_); + const Eigen::array<TensorIndex, 2> sizes = {{m_, k_}}; + const TensorMap<Tensor<float, 2, 0, TensorIndex>, Eigen::Aligned> A(a_, sizes); + TensorMap<Tensor<int, 2, 0, TensorIndex>, Eigen::Aligned> B((int*)b_, sizes); + + StartBenchmarkTiming(); + for (int iter = 0; iter < num_iters; ++iter) { + B.device(device_) = A.cast<int>(); + } + // Record the number of values copied per second + finalizeBenchmark(m_ * k_ * num_iters); + } + void random(int num_iters) { eigen_assert(m_ == k_ && k_ == n_); const Eigen::array<TensorIndex, 2> sizes = {{m_, m_}}; @@ -87,6 +101,34 @@ template <typename Device> class BenchmarkSuite { finalizeBenchmark(m_ * m_ * num_iters); } + void rowChip(int num_iters) { + const Eigen::array<TensorIndex, 2> input_size = {{k_, n_}}; + const TensorMap<Tensor<float, 2, 0, TensorIndex>, Eigen::Aligned> B(b_, input_size); + const Eigen::array<TensorIndex, 1> output_size = {{n_}}; + TensorMap<Tensor<float, 1, 0, TensorIndex>, Eigen::Aligned> C(c_, output_size); + + StartBenchmarkTiming(); + for (int iter = 0; iter < num_iters; ++iter) { + C.device(device_) = B.chip(iter % k_, 0); + } + // Record the number of values copied from the rhs chip to the lhs. + finalizeBenchmark(n_ * num_iters); + } + + void colChip(int num_iters) { + const Eigen::array<TensorIndex, 2> input_size= {{k_, n_}}; + const TensorMap<Tensor<float, 2, 0, TensorIndex>, Eigen::Aligned> B(b_, input_size); + const Eigen::array<TensorIndex, 1> output_size = {{n_}}; + TensorMap<Tensor<float, 1, 0, TensorIndex>, Eigen::Aligned> C(c_, output_size); + + StartBenchmarkTiming(); + for (int iter = 0; iter < num_iters; ++iter) { + C.device(device_) = B.chip(iter % n_, 1); + } + // Record the number of values copied from the rhs chip to the lhs. + finalizeBenchmark(n_ * num_iters); + } + void shuffling(int num_iters) { eigen_assert(m_ == n_); const Eigen::array<TensorIndex, 2> size_a = {{m_, k_}}; @@ -147,7 +189,6 @@ template <typename Device> class BenchmarkSuite { TensorMap<Tensor<float, 2>, Eigen::Aligned> C(c_, size_c); #ifndef EIGEN_HAS_INDEX_LIST - // nvcc doesn't support cxx11 const Eigen::array<int, 2> broadcast = {{1, n_}}; #else // Take advantage of cxx11 to give the compiler information it can use to @@ -212,14 +253,20 @@ template <typename Device> class BenchmarkSuite { finalizeBenchmark(m_ * m_ * num_iters); } - // Simple reduction - void reduction(int num_iters) { + // Row reduction + void rowReduction(int num_iters) { const Eigen::array<TensorIndex, 2> input_size = {{k_, n_}}; - const TensorMap<Tensor<float, 2>, Eigen::Aligned> B(b_, input_size); + const TensorMap<Tensor<float, 2, 0, TensorIndex>, Eigen::Aligned> B(b_, input_size); const Eigen::array<TensorIndex, 1> output_size = {{n_}}; - TensorMap<Tensor<float, 1>, Eigen::Aligned> C(c_, output_size); + TensorMap<Tensor<float, 1, 0, TensorIndex>, Eigen::Aligned> C(c_, output_size); - const Eigen::array<TensorIndex, 1> sum_along_dim = {{0}}; +#ifndef EIGEN_HAS_INDEX_LIST + const Eigen::array<TensorIndex, 1> sum_along_dim(0); +#else + // Take advantage of cxx11 to give the compiler information it can use to + // optimize the code. + Eigen::IndexList<Eigen::type2index<0>> sum_along_dim; +#endif StartBenchmarkTiming(); for (int iter = 0; iter < num_iters; ++iter) { @@ -227,7 +274,33 @@ template <typename Device> class BenchmarkSuite { } // Record the number of FLOP executed per second (assuming one operation // per value) - finalizeBenchmark(m_ * m_ * num_iters); + finalizeBenchmark(k_ * n_ * num_iters); + } + + // Column reduction + void colReduction(int num_iters) { + const Eigen::array<TensorIndex, 2> input_size = {{k_, n_}}; + const TensorMap<Tensor<float, 2, 0, TensorIndex>, Eigen::Aligned> B( + b_, input_size); + const Eigen::array<TensorIndex, 1> output_size = {{k_}}; + TensorMap<Tensor<float, 1, 0, TensorIndex>, Eigen::Aligned> C( + c_, output_size); + +#ifndef EIGEN_HAS_INDEX_LIST + const Eigen::array<TensorIndex, 1> sum_along_dim = {{1}}; +#else + // Take advantage of cxx11 to give the compiler information it can use to + // optimize the code. + Eigen::IndexList<Eigen::type2index<1>> sum_along_dim; +#endif + + StartBenchmarkTiming(); + for (int iter = 0; iter < num_iters; ++iter) { + C.device(device_) = B.sum(sum_along_dim); + } + // Record the number of FLOP executed per second (assuming one operation + // per value) + finalizeBenchmark(k_ * n_ * num_iters); } // do a contraction which is equivalent to a matrix multiplication |