diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2018-08-01 16:13:15 +0000 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2018-08-01 16:13:15 +0000 |
commit | 0360c36170622c2802af638ef7b946742baf1eda (patch) | |
tree | 22affab4fa869a1a39c1127cf8d9c262d190f09d /unsupported/Eigen/CXX11/src | |
parent | 9ca1c091311db45e0c27e6e7701e58c85f508347 (diff) | |
parent | c6a5c70712851cd696d7410579506fc299c04a05 (diff) |
Merged in codeplaysoftware/eigen-upstream-pure/separating_internal_memory_allocation (pull request PR-446)
Distinguishing between internal memory allocation/deallocation from explicit user memory allocation/deallocation.
Diffstat (limited to 'unsupported/Eigen/CXX11/src')
6 files changed, 36 insertions, 13 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h index 571922073..47b5a5a5e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorCustomOp.h @@ -112,7 +112,7 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi return false; } else { m_result = static_cast<CoeffReturnType*>( - m_device.allocate(dimensions().TotalSize() * sizeof(Scalar))); + m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar))); evalTo(m_result); return true; } @@ -120,7 +120,7 @@ struct TensorEvaluator<const TensorCustomUnaryOp<CustomUnaryFunc, XprType>, Devi EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { if (m_result != NULL) { - m_device.deallocate(m_result); + m_device.deallocate_temp(m_result); m_result = NULL; } } @@ -273,7 +273,7 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, evalTo(data); return false; } else { - m_result = static_cast<Scalar *>(m_device.allocate(dimensions().TotalSize() * sizeof(Scalar))); + m_result = static_cast<Scalar *>(m_device.allocate_temp(dimensions().TotalSize() * sizeof(Scalar))); evalTo(m_result); return true; } @@ -281,7 +281,7 @@ struct TensorEvaluator<const TensorCustomBinaryOp<CustomBinaryFunc, LhsXprType, EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { if (m_result != NULL) { - m_device.deallocate(m_result); + m_device.deallocate_temp(m_result); m_result = NULL; } } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h index 5c1c68912..8cb95f731 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h @@ -21,6 +21,12 @@ struct DefaultDevice { EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void* buffer) const { internal::aligned_free(buffer); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void* allocate_temp(size_t num_bytes) const { + return allocate(num_bytes); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate_temp(void* buffer) const { + deallocate(buffer); + } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { ::memcpy(dst, src, n); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h index 0c036833f..b490433db 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h @@ -207,6 +207,15 @@ struct GpuDevice { stream_->deallocate(buffer); } + EIGEN_STRONG_INLINE void* allocate_temp(size_t num_bytes) const { + return stream_->allocate(num_bytes); + } + + EIGEN_STRONG_INLINE void deallocate_temp(void* buffer) const { + stream_->deallocate(buffer); + } + + EIGEN_STRONG_INLINE void* scratchpad() const { return stream_->scratchpad(); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h index 90fd99027..5a16ebe50 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -105,6 +105,14 @@ struct ThreadPoolDevice { internal::aligned_free(buffer); } + EIGEN_STRONG_INLINE void* allocate_temp(size_t num_bytes) const { + return allocate(num_bytes); + } + + EIGEN_STRONG_INLINE void deallocate_temp(void* buffer) const { + deallocate(buffer); + } + EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { ::memcpy(dst, src, n); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h index 5a6555cde..a456f308b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -115,7 +115,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device> #endif EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { const Index numValues = internal::array_prod(m_impl.dimensions()); - m_buffer = (CoeffReturnType*)m_device.allocate(numValues * sizeof(CoeffReturnType)); + m_buffer = (CoeffReturnType*)m_device.allocate_temp(numValues * sizeof(CoeffReturnType)); // Should initialize the memory in case we're dealing with non POD types. if (NumTraits<CoeffReturnType>::RequireInitialization) { for (Index i = 0; i < numValues; ++i) { @@ -129,7 +129,7 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device> return true; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_device.deallocate(m_buffer); + m_device.deallocate_temp(m_buffer); m_buffer = NULL; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index bc09d3699..01d3863da 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -513,7 +513,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, !RunningOnGPU))) { bool need_assign = false; if (!data) { - m_result = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType))); + m_result = static_cast<CoeffReturnType*>(m_device.allocate_temp(sizeof(CoeffReturnType))); data = m_result; need_assign = true; } @@ -525,7 +525,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, const Index num_values_to_reduce = internal::array_prod(m_reducedDims); const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); if (!data) { - data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); + data = static_cast<CoeffReturnType*>(m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); m_result = data; } Op reducer(m_reducer); @@ -549,7 +549,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); if (!data) { if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 128) { - data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); + data = static_cast<CoeffReturnType*>(m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); m_result = data; } else { @@ -559,7 +559,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Op reducer(m_reducer); if (internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) { if (m_result) { - m_device.deallocate(m_result); + m_device.deallocate_temp(m_result); m_result = NULL; } return true; @@ -582,7 +582,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); if (!data) { if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 32) { - data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); + data = static_cast<CoeffReturnType*>(m_device.allocate_temp(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); m_result = data; } else { @@ -592,7 +592,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, Op reducer(m_reducer); if (internal::OuterReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) { if (m_result) { - m_device.deallocate(m_result); + m_device.deallocate_temp(m_result); m_result = NULL; } return true; @@ -607,7 +607,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { m_impl.cleanup(); if (m_result) { - m_device.deallocate(m_result); + m_device.deallocate_temp(m_result); m_result = NULL; } } |