aboutsummaryrefslogtreecommitdiffhomepage
path: root/bench/tensors
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-02-26 12:24:58 -0800
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-02-26 12:24:58 -0800
commit93485d86bcddc0665939ce2c43261dfaa1b8cc90 (patch)
treebda02d881a97fbbd62fd3c260d134ec9245c0d7d /bench/tensors
parent002824e32def5c9a430acac4bd9fc05308c923bb (diff)
Added benchmarks for type casting of float16
Diffstat (limited to 'bench/tensors')
-rw-r--r--bench/tensors/tensor_benchmarks.h6
-rw-r--r--bench/tensors/tensor_benchmarks_fp16_gpu.cu2
2 files changed, 4 insertions, 4 deletions
diff --git a/bench/tensors/tensor_benchmarks.h b/bench/tensors/tensor_benchmarks.h
index b208a401a..131d056b4 100644
--- a/bench/tensors/tensor_benchmarks.h
+++ b/bench/tensors/tensor_benchmarks.h
@@ -48,12 +48,12 @@ template <typename Device, typename T> class BenchmarkSuite {
Eigen::array<TensorIndex, 2> sizes;
sizes[0] = m_;
sizes[1] = k_;
- const TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> A(a_, sizes);
- TensorMap<Tensor<int, 2, 0, TensorIndex>, Eigen::Aligned> B((int*)b_, sizes);
+ const TensorMap<Tensor<int, 2, 0, TensorIndex>, Eigen::Aligned> A((int*)a_, sizes);
+ TensorMap<Tensor<T, 2, 0, TensorIndex>, Eigen::Aligned> B(b_, sizes);
StartBenchmarkTiming();
for (int iter = 0; iter < num_iters; ++iter) {
- B.device(device_) = A.template cast<int>();
+ B.device(device_) = A.template cast<T>();
}
// Record the number of values copied per second
finalizeBenchmark(static_cast<int64_t>(m_) * k_ * num_iters);
diff --git a/bench/tensors/tensor_benchmarks_fp16_gpu.cu b/bench/tensors/tensor_benchmarks_fp16_gpu.cu
index d841bcdac..49f75472a 100644
--- a/bench/tensors/tensor_benchmarks_fp16_gpu.cu
+++ b/bench/tensors/tensor_benchmarks_fp16_gpu.cu
@@ -19,7 +19,7 @@
BENCHMARK_RANGE(BM_##FUNC, 10, 5000);
BM_FuncGPU(memcpy);
-//BM_FuncGPU(typeCasting);
+BM_FuncGPU(typeCasting);
//BM_FuncGPU(random);
BM_FuncGPU(slicing);
BM_FuncGPU(rowChip);