Improved support for vectorization of 16-bit floats

author: Benoit Steiner <benoit.steiner.goog@gmail.com> 2016-06-09 08:22:27 -0700
committer: Benoit Steiner <benoit.steiner.goog@gmail.com> 2016-06-09 08:22:27 -0700
commit: aa33446dace833fbf06632e586c80119b3d8ac11 (patch)
tree: d7bc59330e69e19de37da9fffc64016ee0febec0 /unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
parent: 15890c304edbccedc8a989468ed3fc475f428059 (diff)
1 files changed, 4 insertions, 4 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
index e82530955..1b4fdd03f 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
@@ -331,7 +331,7 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
   #ifdef EIGEN_HAS_CUDA_FP16
   static const bool HasOptimizedImplementation = !Op::IsStateful &&
       (internal::is_same<typename Self::CoeffReturnType, float>::value ||
-       (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && Op::PacketAccess));
+       (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
 #else
   static const bool HasOptimizedImplementation = !Op::IsStateful &&
                                                  internal::is_same<typename Self::CoeffReturnType, float>::value;
@@ -346,7 +346,7 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
       return;
     }
 
-    FullReductionLauncher<Self, Op, OutputType, Op::PacketAccess>::run(self, reducer, device, output, num_coeffs);
+    FullReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs);
   }
 };
 
@@ -608,7 +608,7 @@ struct InnerReducer<Self, Op, GpuDevice> {
 #ifdef EIGEN_HAS_CUDA_FP16
   static const bool HasOptimizedImplementation = !Op::IsStateful &&
       (internal::is_same<typename Self::CoeffReturnType, float>::value ||
-       (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && Op::PacketAccess));
+       (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
 #else
   static const bool HasOptimizedImplementation = !Op::IsStateful &&
                                                  internal::is_same<typename Self::CoeffReturnType, float>::value;
@@ -627,7 +627,7 @@ struct InnerReducer<Self, Op, GpuDevice> {
       return true;
     }
 
-    return InnerReductionLauncher<Self, Op, OutputType, Op::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals);
+    return InnerReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals);
   }
 };
author	Benoit Steiner <benoit.steiner.goog@gmail.com>	2016-06-09 08:22:27 -0700
committer	Benoit Steiner <benoit.steiner.goog@gmail.com>	2016-06-09 08:22:27 -0700
commit	aa33446dace833fbf06632e586c80119b3d8ac11 (patch)
tree	d7bc59330e69e19de37da9fffc64016ee0febec0 /unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
parent	15890c304edbccedc8a989468ed3fc475f428059 (diff)