diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-05-05 14:15:11 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-05-05 14:15:11 -0700 |
commit | f81e4131802d8f437ef52956aa760f56f9e39dd7 (patch) | |
tree | 2d25c7e6c055de6cd2c53ed0b737a15d60bec466 /bench | |
parent | 28d557265803e3b0891309f5e06644bafdacddd6 (diff) |
Added a benchmark to measure the performance of full reductions of 16 bit floats
Diffstat (limited to 'bench')
-rw-r--r-- | bench/tensors/tensor_benchmarks.h | 2 | ||||
-rw-r--r-- | bench/tensors/tensor_benchmarks_fp16_gpu.cu | 1 |
2 files changed, 2 insertions, 1 deletions
diff --git a/bench/tensors/tensor_benchmarks.h b/bench/tensors/tensor_benchmarks.h index 62533a608..e0631b401 100644 --- a/bench/tensors/tensor_benchmarks.h +++ b/bench/tensors/tensor_benchmarks.h @@ -368,7 +368,7 @@ template <typename Device, typename T> class BenchmarkSuite { const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B( b_, input_size); Eigen::array<TensorIndex, 0> output_size; - TensorMap<Tensor<float, 0, 0, TensorIndex>, Eigen::Aligned> C( + TensorMap<Tensor<T, 0, 0, TensorIndex>, Eigen::Aligned> C( c_, output_size); StartBenchmarkTiming(); diff --git a/bench/tensors/tensor_benchmarks_fp16_gpu.cu b/bench/tensors/tensor_benchmarks_fp16_gpu.cu index 14876556e..65784d0d6 100644 --- a/bench/tensors/tensor_benchmarks_fp16_gpu.cu +++ b/bench/tensors/tensor_benchmarks_fp16_gpu.cu @@ -33,6 +33,7 @@ BM_FuncGPU(algebraicFunc); BM_FuncGPU(transcendentalFunc); BM_FuncGPU(rowReduction); BM_FuncGPU(colReduction); +BM_FuncGPU(fullReduction); // Contractions |