aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-06-09 08:22:27 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-06-09 08:22:27 -0700
commitaa33446dace833fbf06632e586c80119b3d8ac11 (patch)
treed7bc59330e69e19de37da9fffc64016ee0febec0 /unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
parent15890c304edbccedc8a989468ed3fc475f428059 (diff)
Improved support for vectorization of 16-bit floats
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h8
1 files changed, 4 insertions, 4 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
index e82530955..1b4fdd03f 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
@@ -331,7 +331,7 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
#ifdef EIGEN_HAS_CUDA_FP16
static const bool HasOptimizedImplementation = !Op::IsStateful &&
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
- (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && Op::PacketAccess));
+ (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
#else
static const bool HasOptimizedImplementation = !Op::IsStateful &&
internal::is_same<typename Self::CoeffReturnType, float>::value;
@@ -346,7 +346,7 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
return;
}
- FullReductionLauncher<Self, Op, OutputType, Op::PacketAccess>::run(self, reducer, device, output, num_coeffs);
+ FullReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs);
}
};
@@ -608,7 +608,7 @@ struct InnerReducer<Self, Op, GpuDevice> {
#ifdef EIGEN_HAS_CUDA_FP16
static const bool HasOptimizedImplementation = !Op::IsStateful &&
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
- (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && Op::PacketAccess));
+ (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
#else
static const bool HasOptimizedImplementation = !Op::IsStateful &&
internal::is_same<typename Self::CoeffReturnType, float>::value;
@@ -627,7 +627,7 @@ struct InnerReducer<Self, Op, GpuDevice> {
return true;
}
- return InnerReductionLauncher<Self, Op, OutputType, Op::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals);
+ return InnerReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals);
}
};