aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/util/Memory.h
diff options
context:
space:
mode:
authorGravatar Deven Desai <deven.desai.amd@gmail.com>2018-07-11 09:17:33 -0400
committerGravatar Deven Desai <deven.desai.amd@gmail.com>2018-07-11 09:17:33 -0400
commit38807a257500cd0746b819c994efab805b8a02e4 (patch)
tree0be837e16ad1dc2b09d8f2be2f752f074b169717 /Eigen/src/Core/util/Memory.h
parente2b2c61533cb923ddba41ba7bd64b87f30a25e29 (diff)
parentf00d08cc0a987fa624209b920608b56638404f13 (diff)
merging updates from upstream
Diffstat (limited to 'Eigen/src/Core/util/Memory.h')
-rw-r--r--Eigen/src/Core/util/Memory.h84
1 files changed, 80 insertions, 4 deletions
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index 059d06874..f2cac01ac 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -580,7 +580,7 @@ template<typename T> struct smart_memmove_helper<T,false> {
// you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA
// to the appropriate stack allocation function
-#ifndef EIGEN_ALLOCA
+#if ! defined EIGEN_ALLOCA && ! defined EIGEN_CUDA_ARCH
#if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca)
#define EIGEN_ALLOCA alloca
#elif EIGEN_COMP_MSVC
@@ -599,12 +599,14 @@ template<typename T> class aligned_stack_memory_handler : noncopyable
* In this case, the buffer elements will also be destructed when this handler will be destructed.
* Finally, if \a dealloc is true, then the pointer \a ptr is freed.
**/
+ EIGEN_DEVICE_FUNC
aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc)
: m_ptr(ptr), m_size(size), m_deallocate(dealloc)
{
if(NumTraits<T>::RequireInitialization && m_ptr)
Eigen::internal::construct_elements_of_array(m_ptr, size);
}
+ EIGEN_DEVICE_FUNC
~aligned_stack_memory_handler()
{
if(NumTraits<T>::RequireInitialization && m_ptr)
@@ -618,6 +620,60 @@ template<typename T> class aligned_stack_memory_handler : noncopyable
bool m_deallocate;
};
+#ifdef EIGEN_ALLOCA
+
+template<typename Xpr, int NbEvaluations,
+ bool MapExternalBuffer = nested_eval<Xpr,NbEvaluations>::Evaluate && Xpr::MaxSizeAtCompileTime==Dynamic
+ >
+struct local_nested_eval_wrapper
+{
+ static const bool NeedExternalBuffer = false;
+ typedef typename Xpr::Scalar Scalar;
+ typedef typename nested_eval<Xpr,NbEvaluations>::type ObjectType;
+ ObjectType object;
+
+ EIGEN_DEVICE_FUNC
+ local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr) : object(xpr)
+ {
+ EIGEN_UNUSED_VARIABLE(ptr);
+ eigen_internal_assert(ptr==0);
+ }
+};
+
+template<typename Xpr, int NbEvaluations>
+struct local_nested_eval_wrapper<Xpr,NbEvaluations,true>
+{
+ static const bool NeedExternalBuffer = true;
+ typedef typename Xpr::Scalar Scalar;
+ typedef typename plain_object_eval<Xpr>::type PlainObject;
+ typedef Map<PlainObject,EIGEN_DEFAULT_ALIGN_BYTES> ObjectType;
+ ObjectType object;
+
+ EIGEN_DEVICE_FUNC
+ local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr)
+ : object(ptr==0 ? reinterpret_cast<Scalar*>(Eigen::internal::aligned_malloc(sizeof(Scalar)*xpr.size())) : ptr, xpr.rows(), xpr.cols()),
+ m_deallocate(ptr==0)
+ {
+ if(NumTraits<Scalar>::RequireInitialization && object.data())
+ Eigen::internal::construct_elements_of_array(object.data(), object.size());
+ object = xpr;
+ }
+
+ EIGEN_DEVICE_FUNC
+ ~local_nested_eval_wrapper()
+ {
+ if(NumTraits<Scalar>::RequireInitialization && object.data())
+ Eigen::internal::destruct_elements_of_array(object.data(), object.size());
+ if(m_deallocate)
+ Eigen::internal::aligned_free(object.data());
+ }
+
+private:
+ bool m_deallocate;
+};
+
+#endif // EIGEN_ALLOCA
+
template<typename T> class scoped_array : noncopyable
{
T* m_ptr;
@@ -645,9 +701,11 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
} // end namespace internal
/** \internal
- * Declares, allocates and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack
- * if SIZE is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform
- * (currently, this is Linux and Visual Studio only). Otherwise the memory is allocated on the heap.
+ *
+ * The macro ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) declares, allocates,
+ * and construct an aligned buffer named NAME of SIZE elements of type TYPE on the stack
+ * if the size in bytes is smaller than EIGEN_STACK_ALLOCATION_LIMIT, and if stack allocation is supported by the platform
+ * (currently, this is Linux, OSX and Visual Studio only). Otherwise the memory is allocated on the heap.
* The allocated buffer is automatically deleted when exiting the scope of this declaration.
* If BUFFER is non null, then the declared variable is simply an alias for BUFFER, and no allocation/deletion occurs.
* Here is an example:
@@ -658,6 +716,14 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
* }
* \endcode
* The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token.
+ *
+ * The macro ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) is analogue to
+ * \code
+ * typename internal::nested_eval<XPRT_T,N>::type NAME(XPR);
+ * \endcode
+ * with the advantage of using aligned stack allocation even if the maximal size of XPR at compile time is unknown.
+ * This is accomplished through alloca if this later is supported and if the required number of bytes
+ * is below EIGEN_STACK_ALLOCATION_LIMIT.
*/
#ifdef EIGEN_ALLOCA
@@ -677,6 +743,13 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
: Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) ); \
Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT)
+
+ #define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) \
+ Eigen::internal::local_nested_eval_wrapper<XPR_T,N> EIGEN_CAT(NAME,_wrapper)(XPR, reinterpret_cast<typename XPR_T::Scalar*>( \
+ ( (Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::NeedExternalBuffer) && ((sizeof(typename XPR_T::Scalar)*XPR.size())<=EIGEN_STACK_ALLOCATION_LIMIT) ) \
+ ? EIGEN_ALIGNED_ALLOCA( sizeof(typename XPR_T::Scalar)*XPR.size() ) : 0 ) ) ; \
+ typename Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::ObjectType NAME(EIGEN_CAT(NAME,_wrapper).object)
+
#else
#define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
@@ -684,6 +757,9 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \
Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true)
+
+#define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) typename Eigen::internal::nested_eval<XPR_T,N>::type NAME(XPR);
+
#endif