This commit contains the following (HIP specific) updates:

- unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h Changing "pass-by-reference" argument to be "pass-by-value" instead (in a __global__ function decl). "pass-by-reference" arguments to __global__ functions are unwise, and will be explicitly flagged as errors by the newer versions of HIP. - Eigen/src/Core/util/Memory.h - unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h Changes introduced in recent commits breaks the HIP compile. Adding EIGEN_DEVICE_FUNC attribute to some functions and calling ::malloc/free instead of the corresponding std:: versions to get the HIP compile working again - unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h Change introduced a recent commit breaks the HIP compile (link stage errors out due to failure to inline a function). Disabling the recently introduced code (only for HIP compile), to get the eigen nightly testing going again. Will submit another PR once we have te proper fix. - Eigen/src/Core/util/ConfigureVectorization.h Enabling GPU VECTOR support when HIP compiler is in use (for both the host and device compile phases)
author: Deven Desai <deven.desai.amd@gmail.com> 2018-10-01 14:28:37 +0000
committer: Deven Desai <deven.desai.amd@gmail.com> 2018-10-01 14:28:37 +0000
commit: 94898488a6fe3096a7a44d0bb108e514f0e44699 (patch)
tree: 7aced8073d5a62c5f0c6696f77adcd6994732c82 /unsupported/Eigen
parent: e95696acb313a84b33a18cc300de418b05dc58e5 (diff)
3 files changed, 7 insertions, 5 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
index b92753c44..6fc1e4a6e 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
@@ -186,21 +186,21 @@ struct TensorContractionKernel {
       /*ConjugateLhs*/ false, /*ConjugateRhs*/ false>
       GebpKernel;
 
-  EIGEN_DONT_INLINE
+  EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE
   static void packLhs(LhsScalar* lhsBlock,
                       const typename LhsMapper::SubMapper& data_mapper,
                       const StorageIndex depth, const StorageIndex rows) {
     LhsPacker()(lhsBlock, data_mapper, depth, rows, /*stride*/ 0, /*offset*/ 0);
   }
 
-  EIGEN_DONT_INLINE
+  EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE
   static void packRhs(RhsScalar* rhsBlock,
                       const typename RhsMapper::SubMapper& data_mapper,
                       const StorageIndex depth, const StorageIndex cols) {
     RhsPacker()(rhsBlock, data_mapper, depth, cols);
   }
 
-  EIGEN_DONT_INLINE
+  EIGEN_DEVICE_FUNC EIGEN_DONT_INLINE
   static void invoke(const OutputMapper& output_mapper,
                      const LhsScalar* lhsBlock, const RhsScalar* rhsBlock,
                      const StorageIndex rows, const StorageIndex depth,
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
index 949764f3a..2c69e4fd4 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@@ -218,6 +218,7 @@ struct InnerMostDimReducer<Self, Op, false, true> {
   }
 };
 
+#if !defined(EIGEN_HIPCC) 
 template <typename Self, typename Op>
 struct InnerMostDimReducer<Self, Op, true, true> {
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename Self::CoeffReturnType
@@ -257,7 +258,8 @@ struct InnerMostDimReducer<Self, Op, true, true> {
     }
   }
 };
-
+#endif
+ 
 template <int DimIndex, typename Self, typename Op, bool vectorizable = (Self::InputPacketAccess && Self::ReducerTraits::PacketAccess)>
 struct InnerMostDimPreserver {
   static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const Self&, typename Self::Index, Op&, typename Self::PacketReturnType*) {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h
index 88940e6e6..375c570b3 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h
@@ -292,7 +292,7 @@ __global__ void FullReductionKernelHalfFloat(Reducer reducer, const Self input,
 }
 
 template <typename Op>
-__global__ void ReductionCleanupKernelHalfFloat(Op& reducer, half* output, half2* scratch) {
+__global__ void ReductionCleanupKernelHalfFloat(Op reducer, half* output, half2* scratch) {
   eigen_assert(threadIdx.x == 1);
   half tmp = __low2half(*scratch);
   reducer.reduce(__high2half(*scratch), &tmp);
author	Deven Desai <deven.desai.amd@gmail.com>	2018-10-01 14:28:37 +0000
committer	Deven Desai <deven.desai.amd@gmail.com>	2018-10-01 14:28:37 +0000
commit	94898488a6fe3096a7a44d0bb108e514f0e44699 (patch)
tree	7aced8073d5a62c5f0c6696f77adcd6994732c82 /unsupported/Eigen
parent	e95696acb313a84b33a18cc300de418b05dc58e5 (diff)