diff options
author | Rasmus Munk Larsen <rmlarsen@google.com> | 2018-09-11 10:08:10 -0700 |
---|---|---|
committer | Rasmus Munk Larsen <rmlarsen@google.com> | 2018-09-11 10:08:10 -0700 |
commit | 46f88fc454e78484ebdf9d58990d0489c1103cf4 (patch) | |
tree | 3f5702d5b0bd589963a25b6f3f5e49286f467a5f /unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h | |
parent | 43fd42a33b484914ca92931ea63583b672c5e67b (diff) |
Use numerically stable tree reduction in TensorReduction.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h index cd20df505..7504c1598 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h @@ -376,12 +376,12 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> { // so reduce the scope of the optimized version of the code to the simple cases // of doubles, floats and half floats #ifdef EIGEN_HAS_GPU_FP16 - static const bool HasOptimizedImplementation = !Op::IsStateful && + static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful && (internal::is_same<typename Self::CoeffReturnType, float>::value || internal::is_same<typename Self::CoeffReturnType, double>::value || (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess)); #else // EIGEN_HAS_GPU_FP16 - static const bool HasOptimizedImplementation = !Op::IsStateful && + static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful && (internal::is_same<typename Self::CoeffReturnType, float>::value || internal::is_same<typename Self::CoeffReturnType, double>::value); #endif // EIGEN_HAS_GPU_FP16 @@ -697,12 +697,12 @@ struct InnerReducer<Self, Op, GpuDevice> { // so reduce the scope of the optimized version of the code to the simple case // of floats and half floats. #ifdef EIGEN_HAS_GPU_FP16 - static const bool HasOptimizedImplementation = !Op::IsStateful && + static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful && (internal::is_same<typename Self::CoeffReturnType, float>::value || internal::is_same<typename Self::CoeffReturnType, double>::value || (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess)); #else // EIGEN_HAS_GPU_FP16 - static const bool HasOptimizedImplementation = !Op::IsStateful && + static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful && (internal::is_same<typename Self::CoeffReturnType, float>::value || internal::is_same<typename Self::CoeffReturnType, double>::value); #endif // EIGEN_HAS_GPU_FP16 @@ -759,7 +759,7 @@ struct OuterReducer<Self, Op, GpuDevice> { // Unfortunately nvidia doesn't support well exotic types such as complex, // so reduce the scope of the optimized version of the code to the simple case // of floats. - static const bool HasOptimizedImplementation = !Op::IsStateful && + static const bool HasOptimizedImplementation = !Self::ReducerTraits::IsStateful && (internal::is_same<typename Self::CoeffReturnType, float>::value || internal::is_same<typename Self::CoeffReturnType, double>::value); template <typename Device, typename OutputType> |