diff options
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h | 6 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h | 4 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h | 2 |
3 files changed, 7 insertions, 5 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index b92753c44..6fc1e4a6e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -186,21 +186,21 @@ struct TensorContractionKernel { /*ConjugateLhs*/ false, /*ConjugateRhs*/ false> GebpKernel; - EIGEN_DONT_INLINE + EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE static void packLhs(LhsScalar* lhsBlock, const typename LhsMapper::SubMapper& data_mapper, const StorageIndex depth, const StorageIndex rows) { LhsPacker()(lhsBlock, data_mapper, depth, rows, /*stride*/ 0, /*offset*/ 0); } - EIGEN_DONT_INLINE + EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE static void packRhs(RhsScalar* rhsBlock, const typename RhsMapper::SubMapper& data_mapper, const StorageIndex depth, const StorageIndex cols) { RhsPacker()(rhsBlock, data_mapper, depth, cols); } - EIGEN_DONT_INLINE + EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE static void invoke(const OutputMapper& output_mapper, const LhsScalar* lhsBlock, const RhsScalar* rhsBlock, const StorageIndex rows, const StorageIndex depth, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 949764f3a..2c69e4fd4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -218,6 +218,7 @@ struct InnerMostDimReducer<Self, Op, false, true> { } }; +#if !defined(EIGEN_HIPCC) template <typename Self, typename Op> struct InnerMostDimReducer<Self, Op, true, true> { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType @@ -257,7 +258,8 @@ struct InnerMostDimReducer<Self, Op, true, true> { } } }; - +#endif + template <int DimIndex, typename Self, typename Op, bool vectorizable = (Self::InputPacketAccess && Self::ReducerTraits::PacketAccess)> struct InnerMostDimPreserver { static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self&, typename Self::Index, Op&, typename Self::PacketReturnType*) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h index 88940e6e6..375c570b3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h @@ -292,7 +292,7 @@ __global__ void FullReductionKernelHalfFloat(Reducer reducer, const Self input, } template <typename Op> -__global__ void ReductionCleanupKernelHalfFloat(Op& reducer, half* output, half2* scratch) { +__global__ void ReductionCleanupKernelHalfFloat(Op reducer, half* output, half2* scratch) { eigen_assert(threadIdx.x == 1); half tmp = __low2half(*scratch); reducer.reduce(__high2half(*scratch), &tmp); |