aboutsummaryrefslogtreecommitdiffhomepage
path: root/bench
diff options
context:
space:
mode:
Diffstat (limited to 'bench')
-rw-r--r--bench/BenchTimer.h1
-rw-r--r--bench/tensors/tensor_benchmarks.h6
-rw-r--r--bench/tensors/tensor_benchmarks_fp16_gpu.cu8
3 files changed, 10 insertions, 5 deletions
diff --git a/bench/BenchTimer.h b/bench/BenchTimer.h
index 64666d75f..ea28496b7 100644
--- a/bench/BenchTimer.h
+++ b/bench/BenchTimer.h
@@ -22,7 +22,6 @@
# endif
# include <windows.h>
#elif defined(__APPLE__)
-#include <CoreServices/CoreServices.h>
#include <mach/mach_time.h>
#else
# include <unistd.h>
diff --git a/bench/tensors/tensor_benchmarks.h b/bench/tensors/tensor_benchmarks.h
index 90b9bc741..62533a608 100644
--- a/bench/tensors/tensor_benchmarks.h
+++ b/bench/tensors/tensor_benchmarks.h
@@ -201,9 +201,15 @@ template <typename Device, typename T> class BenchmarkSuite {
size_b[1] = k_/2;
TensorMap<Tensor<T, 2>, Eigen::Aligned> B(b_, size_b);
+#ifndef EIGEN_HAS_INDEX_LIST
Eigen::array<TensorIndex, 2> strides;
strides[0] = 1;
strides[1] = 2;
+#else
+ // Take advantage of cxx11 to give the compiler information it can use to
+ // optimize the code.
+ Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> > strides;
+#endif
StartBenchmarkTiming();
for (int iter = 0; iter < num_iters; ++iter) {
diff --git a/bench/tensors/tensor_benchmarks_fp16_gpu.cu b/bench/tensors/tensor_benchmarks_fp16_gpu.cu
index d34bd73ca..14876556e 100644
--- a/bench/tensors/tensor_benchmarks_fp16_gpu.cu
+++ b/bench/tensors/tensor_benchmarks_fp16_gpu.cu
@@ -29,8 +29,8 @@ BM_FuncGPU(padding);
BM_FuncGPU(striding);
BM_FuncGPU(broadcasting);
BM_FuncGPU(coeffWiseOp);
-//BM_FuncGPU(algebraicFunc);
-//BM_FuncGPU(transcendentalFunc);
+BM_FuncGPU(algebraicFunc);
+BM_FuncGPU(transcendentalFunc);
BM_FuncGPU(rowReduction);
BM_FuncGPU(colReduction);
@@ -48,11 +48,11 @@ BM_FuncGPU(colReduction);
BENCHMARK_RANGE(BM_##FUNC##_##D1##x##D2##x##D3, 10, 5000);
-/*BM_FuncWithInputDimsGPU(contraction, N, N, N);
+BM_FuncWithInputDimsGPU(contraction, N, N, N);
BM_FuncWithInputDimsGPU(contraction, 64, N, N);
BM_FuncWithInputDimsGPU(contraction, N, 64, N);
BM_FuncWithInputDimsGPU(contraction, N, N, 64);
-*/
+
// Convolutions
#define BM_FuncWithKernelDimsGPU(FUNC, DIM1, DIM2) \