aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11
diff options
context:
space:
mode:
authorGravatar Christoph Hertzberg <chtz@informatik.uni-bremen.de>2018-10-01 16:51:04 +0000
committerGravatar Christoph Hertzberg <chtz@informatik.uni-bremen.de>2018-10-01 16:51:04 +0000
commit564ca71e392f580144a7ded4ac9e3700c848495e (patch)
tree7d5cafc2305816145c1f55f89effe0397f7ebc34 /unsupported/Eigen/CXX11
parent2088c0897f6ea7175d06de98fe04c71cd453a34d (diff)
parent94898488a6fe3096a7a44d0bb108e514f0e44699 (diff)
Merged in deven-amd/eigen/HIP_fixes (pull request PR-518)
PR with HIP specific fixes (for the eigen nightly regression failures in HIP mode)
Diffstat (limited to 'unsupported/Eigen/CXX11')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h6
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h2
3 files changed, 7 insertions, 5 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
index 8f66e587a..d1c659858 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
@@ -186,21 +186,21 @@ struct TensorContractionKernel {
/*ConjugateLhs*/ false, /*ConjugateRhs*/ false>
GebpKernel;
- EIGEN_DONT_INLINE
+ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE
static void packLhs(LhsScalar* lhsBlock,
const typename LhsMapper::SubMapper& data_mapper,
const StorageIndex depth, const StorageIndex rows) {
LhsPacker()(lhsBlock, data_mapper, depth, rows, /*stride*/ 0, /*offset*/ 0);
}
- EIGEN_DONT_INLINE
+ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE
static void packRhs(RhsScalar* rhsBlock,
const typename RhsMapper::SubMapper& data_mapper,
const StorageIndex depth, const StorageIndex cols) {
RhsPacker()(rhsBlock, data_mapper, depth, cols);
}
- EIGEN_DONT_INLINE
+ EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE
static void invoke(const OutputMapper& output_mapper,
const LhsScalar* lhsBlock, const RhsScalar* rhsBlock,
const StorageIndex rows, const StorageIndex depth,
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
index 949764f3a..2c69e4fd4 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@@ -218,6 +218,7 @@ struct InnerMostDimReducer<Self, Op, false, true> {
}
};
+#if !defined(EIGEN_HIPCC)
template <typename Self, typename Op>
struct InnerMostDimReducer<Self, Op, true, true> {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType
@@ -257,7 +258,8 @@ struct InnerMostDimReducer<Self, Op, true, true> {
}
}
};
-
+#endif
+
template <int DimIndex, typename Self, typename Op, bool vectorizable = (Self::InputPacketAccess && Self::ReducerTraits::PacketAccess)>
struct InnerMostDimPreserver {
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self&, typename Self::Index, Op&, typename Self::PacketReturnType*) {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h
index 88940e6e6..375c570b3 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h
@@ -292,7 +292,7 @@ __global__ void FullReductionKernelHalfFloat(Reducer reducer, const Self input,
}
template <typename Op>
-__global__ void ReductionCleanupKernelHalfFloat(Op& reducer, half* output, half2* scratch) {
+__global__ void ReductionCleanupKernelHalfFloat(Op reducer, half* output, half2* scratch) {
eigen_assert(threadIdx.x == 1);
half tmp = __low2half(*scratch);
reducer.reduce(__high2half(*scratch), &tmp);