aboutsummaryrefslogtreecommitdiffhomepage
path: root/bench
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-05-05 14:15:11 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-05-05 14:15:11 -0700
commitf81e4131802d8f437ef52956aa760f56f9e39dd7 (patch)
tree2d25c7e6c055de6cd2c53ed0b737a15d60bec466 /bench
parent28d557265803e3b0891309f5e06644bafdacddd6 (diff)
Added a benchmark to measure the performance of full reductions of 16 bit floats
Diffstat (limited to 'bench')
-rw-r--r--bench/tensors/tensor_benchmarks.h2
-rw-r--r--bench/tensors/tensor_benchmarks_fp16_gpu.cu1
2 files changed, 2 insertions, 1 deletions
diff --git a/bench/tensors/tensor_benchmarks.h b/bench/tensors/tensor_benchmarks.h
index 62533a608..e0631b401 100644
--- a/bench/tensors/tensor_benchmarks.h
+++ b/bench/tensors/tensor_benchmarks.h
@@ -368,7 +368,7 @@ template <typename Device, typename T> class BenchmarkSuite {
const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(
b_, input_size);
Eigen::array<TensorIndex, 0> output_size;
- TensorMap<Tensor<float, 0, 0, TensorIndex>, Eigen::Aligned> C(
+ TensorMap<Tensor<T, 0, 0, TensorIndex>, Eigen::Aligned> C(
c_, output_size);
StartBenchmarkTiming();
diff --git a/bench/tensors/tensor_benchmarks_fp16_gpu.cu b/bench/tensors/tensor_benchmarks_fp16_gpu.cu
index 14876556e..65784d0d6 100644
--- a/bench/tensors/tensor_benchmarks_fp16_gpu.cu
+++ b/bench/tensors/tensor_benchmarks_fp16_gpu.cu
@@ -33,6 +33,7 @@ BM_FuncGPU(algebraicFunc);
BM_FuncGPU(transcendentalFunc);
BM_FuncGPU(rowReduction);
BM_FuncGPU(colReduction);
+BM_FuncGPU(fullReduction);
// Contractions