aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/util/Memory.h
diff options
context:
space:
mode:
authorGravatar Deven Desai <deven.desai.amd@gmail.com>2018-10-01 14:28:37 +0000
committerGravatar Deven Desai <deven.desai.amd@gmail.com>2018-10-01 14:28:37 +0000
commit94898488a6fe3096a7a44d0bb108e514f0e44699 (patch)
tree7aced8073d5a62c5f0c6696f77adcd6994732c82 /Eigen/src/Core/util/Memory.h
parente95696acb313a84b33a18cc300de418b05dc58e5 (diff)
This commit contains the following (HIP specific) updates:
- unsupported/Eigen/CXX11/src/Tensor/TensorReductionGpu.h Changing "pass-by-reference" argument to be "pass-by-value" instead (in a __global__ function decl). "pass-by-reference" arguments to __global__ functions are unwise, and will be explicitly flagged as errors by the newer versions of HIP. - Eigen/src/Core/util/Memory.h - unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h Changes introduced in recent commits breaks the HIP compile. Adding EIGEN_DEVICE_FUNC attribute to some functions and calling ::malloc/free instead of the corresponding std:: versions to get the HIP compile working again - unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h Change introduced a recent commit breaks the HIP compile (link stage errors out due to failure to inline a function). Disabling the recently introduced code (only for HIP compile), to get the eigen nightly testing going again. Will submit another PR once we have te proper fix. - Eigen/src/Core/util/ConfigureVectorization.h Enabling GPU VECTOR support when HIP compiler is in use (for both the host and device compile phases)
Diffstat (limited to 'Eigen/src/Core/util/Memory.h')
-rw-r--r--Eigen/src/Core/util/Memory.h18
1 files changed, 15 insertions, 3 deletions
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index 9dd2e0252..c624556c5 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -96,10 +96,16 @@ inline void throw_std_bad_alloc()
/** \internal Like malloc, but the returned pointer is guaranteed to be 16-byte aligned.
* Fast, but wastes 16 additional bytes of memory. Does not throw any exception.
*/
-inline void* handmade_aligned_malloc(std::size_t size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES)
+EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES)
{
eigen_assert(alignment >= sizeof(void*) && (alignment & -alignment) == alignment && "Alignment must be at least sizeof(void*) and a power of 2");
+
+#if defined(EIGEN_HIP_DEVICE_COMPILE)
+ void *original = ::malloc(size+alignment);
+#else
void *original = std::malloc(size+alignment);
+#endif
+
if (original == 0) return 0;
void *aligned = reinterpret_cast<void*>((reinterpret_cast<std::size_t>(original) & ~(std::size_t(alignment-1))) + alignment);
*(reinterpret_cast<void**>(aligned) - 1) = original;
@@ -107,9 +113,15 @@ inline void* handmade_aligned_malloc(std::size_t size, std::size_t alignment = E
}
/** \internal Frees memory allocated with handmade_aligned_malloc */
-inline void handmade_aligned_free(void *ptr)
+EIGEN_DEVICE_FUNC inline void handmade_aligned_free(void *ptr)
{
- if (ptr) std::free(*(reinterpret_cast<void**>(ptr) - 1));
+ if (ptr) {
+#if defined(EIGEN_HIP_DEVICE_COMPILE)
+ ::free(*(reinterpret_cast<void**>(ptr) - 1));
+#else
+ std::free(*(reinterpret_cast<void**>(ptr) - 1));
+#endif
+ }
}
/** \internal