diff options
Diffstat (limited to 'tensorflow/core/kernels/gather_nd_op_cpu_impl.h')
-rw-r--r-- | tensorflow/core/kernels/gather_nd_op_cpu_impl.h | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/tensorflow/core/kernels/gather_nd_op_cpu_impl.h b/tensorflow/core/kernels/gather_nd_op_cpu_impl.h index 277ee2be02..1c78de253e 100644 --- a/tensorflow/core/kernels/gather_nd_op_cpu_impl.h +++ b/tensorflow/core/kernels/gather_nd_op_cpu_impl.h @@ -114,7 +114,7 @@ struct GatherNdSlice<CPUDevice, T, Index, IXDIM> { generator::GatherNdSliceGenerator<T, Index, IXDIM> gather_nd_generator( slice_size, Tindices, Tparams, Tout, &error_loc); -#ifdef INTEL_MKL +#if defined(INTEL_MKL) && defined(ENABLE_MKL) // Eigen implementation below is not highly performant. gather_nd_generator // does not seem to be called in parallel, leading to very poor performance. // Additionally, since it uses scalar (Tscratch) to invoke 'generate', it @@ -126,12 +126,12 @@ struct GatherNdSlice<CPUDevice, T, Index, IXDIM> { const Eigen::array<Eigen::DenseIndex, 1> loc{i}; gather_nd_generator(loc); } -#else // INTEL_MKL +#else // INTEL_MKL && ENABLE_MKL Tscratch.device(d) = Tscratch.reshape(reshape_dims) .broadcast(broadcast_dims) .generate(gather_nd_generator) .sum(); -#endif +#endif // INTEL_MKL && ENABLE_MKL // error_loc() returns -1 if there's no out-of-bounds index, // otherwise it returns the location of an OOB index in Tindices. |