diff options
Diffstat (limited to 'bench')
-rw-r--r-- | bench/BenchTimer.h | 1 | ||||
-rw-r--r-- | bench/tensors/tensor_benchmarks.h | 6 | ||||
-rw-r--r-- | bench/tensors/tensor_benchmarks_fp16_gpu.cu | 8 |
3 files changed, 10 insertions, 5 deletions
diff --git a/bench/BenchTimer.h b/bench/BenchTimer.h index 64666d75f..ea28496b7 100644 --- a/bench/BenchTimer.h +++ b/bench/BenchTimer.h @@ -22,7 +22,6 @@ # endif # include <windows.h> #elif defined(__APPLE__) -#include <CoreServices/CoreServices.h> #include <mach/mach_time.h> #else # include <unistd.h> diff --git a/bench/tensors/tensor_benchmarks.h b/bench/tensors/tensor_benchmarks.h index 90b9bc741..62533a608 100644 --- a/bench/tensors/tensor_benchmarks.h +++ b/bench/tensors/tensor_benchmarks.h @@ -201,9 +201,15 @@ template <typename Device, typename T> class BenchmarkSuite { size_b[1] = k_/2; TensorMap<Tensor<T, 2>, Eigen::Aligned> B(b_, size_b); +#ifndef EIGEN_HAS_INDEX_LIST Eigen::array<TensorIndex, 2> strides; strides[0] = 1; strides[1] = 2; +#else + // Take advantage of cxx11 to give the compiler information it can use to + // optimize the code. + Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> > strides; +#endif StartBenchmarkTiming(); for (int iter = 0; iter < num_iters; ++iter) { diff --git a/bench/tensors/tensor_benchmarks_fp16_gpu.cu b/bench/tensors/tensor_benchmarks_fp16_gpu.cu index d34bd73ca..14876556e 100644 --- a/bench/tensors/tensor_benchmarks_fp16_gpu.cu +++ b/bench/tensors/tensor_benchmarks_fp16_gpu.cu @@ -29,8 +29,8 @@ BM_FuncGPU(padding); BM_FuncGPU(striding); BM_FuncGPU(broadcasting); BM_FuncGPU(coeffWiseOp); -//BM_FuncGPU(algebraicFunc); -//BM_FuncGPU(transcendentalFunc); +BM_FuncGPU(algebraicFunc); +BM_FuncGPU(transcendentalFunc); BM_FuncGPU(rowReduction); BM_FuncGPU(colReduction); @@ -48,11 +48,11 @@ BM_FuncGPU(colReduction); BENCHMARK_RANGE(BM_##FUNC##_##D1##x##D2##x##D3, 10, 5000); -/*BM_FuncWithInputDimsGPU(contraction, N, N, N); +BM_FuncWithInputDimsGPU(contraction, N, N, N); BM_FuncWithInputDimsGPU(contraction, 64, N, N); BM_FuncWithInputDimsGPU(contraction, N, 64, N); BM_FuncWithInputDimsGPU(contraction, N, N, 64); -*/ + // Convolutions #define BM_FuncWithKernelDimsGPU(FUNC, DIM1, DIM2) \ |