aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2017-01-25 09:22:26 -0800
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2017-01-25 09:22:26 -0800
commit7d39c6d50a594f37a2f1e5cb9140917993b5e776 (patch)
treeba5a8239f95840b5111559e47ad3e9324ba98df7
parent5c9ed4ba0d1063e0ea1d78b24b4cd7480e68d22c (diff)
parent850ca961d28df99a0ba44bd8bf034ac08e39686e (diff)
Merged eigen/eigen into default
-rw-r--r--Eigen/src/Core/CoreEvaluators.h4
-rw-r--r--Eigen/src/Core/functors/NullaryFunctors.h4
-rw-r--r--Eigen/src/Core/util/Memory.h63
-rw-r--r--Eigen/src/Core/util/XprHelper.h2
-rw-r--r--Eigen/src/SparseCore/SparseCompressedBase.h38
-rw-r--r--test/nullary.cpp42
-rw-r--r--test/sparse_basic.cpp4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h4
11 files changed, 130 insertions, 37 deletions
diff --git a/Eigen/src/Core/CoreEvaluators.h b/Eigen/src/Core/CoreEvaluators.h
index 24fc7835b..412f5a661 100644
--- a/Eigen/src/Core/CoreEvaluators.h
+++ b/Eigen/src/Core/CoreEvaluators.h
@@ -1613,9 +1613,7 @@ struct evaluator<Diagonal<ArgType, DiagIndex> >
{ }
typedef typename XprType::Scalar Scalar;
- // FIXME having to check whether ArgType is sparse here i not very nice.
- typedef typename internal::conditional<!internal::is_same<typename ArgType::StorageKind,Sparse>::value,
- typename XprType::CoeffReturnType,Scalar>::type CoeffReturnType;
+ typedef typename XprType::CoeffReturnType CoeffReturnType;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
CoeffReturnType coeff(Index row, Index) const
diff --git a/Eigen/src/Core/functors/NullaryFunctors.h b/Eigen/src/Core/functors/NullaryFunctors.h
index 0311d9035..6a30466fb 100644
--- a/Eigen/src/Core/functors/NullaryFunctors.h
+++ b/Eigen/src/Core/functors/NullaryFunctors.h
@@ -93,8 +93,8 @@ struct linspaced_op_impl<Scalar,Packet,/*IsInteger*/true>
linspaced_op_impl(const Scalar& low, const Scalar& high, Index num_steps) :
m_low(low),
m_multiplier((high-low)/convert_index<Scalar>(num_steps<=1 ? 1 : num_steps-1)),
- m_divisor(convert_index<Scalar>(num_steps+high-low)/(high-low+1)),
- m_use_divisor((high+1)<(low+num_steps))
+ m_divisor(convert_index<Scalar>((high>=low?num_steps:-num_steps)+(high-low))/((numext::abs(high-low)+1)==0?1:(numext::abs(high-low)+1))),
+ m_use_divisor(num_steps>1 && (numext::abs(high-low)+1)<num_steps)
{}
template<typename IndexType>
diff --git a/Eigen/src/Core/util/Memory.h b/Eigen/src/Core/util/Memory.h
index c634d7ea0..572b1fe69 100644
--- a/Eigen/src/Core/util/Memory.h
+++ b/Eigen/src/Core/util/Memory.h
@@ -63,7 +63,7 @@ namespace Eigen {
namespace internal {
-EIGEN_DEVICE_FUNC
+EIGEN_DEVICE_FUNC
inline void throw_std_bad_alloc()
{
#ifdef EIGEN_EXCEPTIONS
@@ -74,6 +74,41 @@ inline void throw_std_bad_alloc()
#endif
}
+EIGEN_DEVICE_FUNC
+inline void fast_memcpy(void* dst, const void* src, size_t size) {
+#if defined(__CUDA__) || defined(__ANDROID__)
+ ::memcpy(dst, src, size);
+#else
+ switch(size) {
+ // Most compilers will generate inline code for fixed sizes,
+ // which is significantly faster for small copies.
+ case 1: memcpy(dst, src, 1); break;
+ case 2: memcpy(dst, src, 2); break;
+ case 3: memcpy(dst, src, 3); break;
+ case 4: memcpy(dst, src, 4); break;
+ case 5: memcpy(dst, src, 5); break;
+ case 6: memcpy(dst, src, 6); break;
+ case 7: memcpy(dst, src, 7); break;
+ case 8: memcpy(dst, src, 8); break;
+ case 9: memcpy(dst, src, 9); break;
+ case 10: memcpy(dst, src, 10); break;
+ case 11: memcpy(dst, src, 11); break;
+ case 12: memcpy(dst, src, 12); break;
+ case 13: memcpy(dst, src, 13); break;
+ case 14: memcpy(dst, src, 14); break;
+ case 15: memcpy(dst, src, 15); break;
+ case 16: memcpy(dst, src, 16); break;
+#ifdef EIGEN_OS_LINUX
+ // On Linux, memmove appears to be faster than memcpy for
+ // large sizes, strangely enough.
+ default: memmove(dst, src, size); break;
+#else
+ default: memcpy(dst, src, size); break;
+#endif
+ }
+#endif
+}
+
/*****************************************************************************
*** Implementation of handmade aligned functions ***
*****************************************************************************/
@@ -114,7 +149,7 @@ inline void* handmade_aligned_realloc(void* ptr, std::size_t size, std::size_t =
void *previous_aligned = static_cast<char *>(original)+previous_offset;
if(aligned!=previous_aligned)
std::memmove(aligned, previous_aligned, size);
-
+
*(reinterpret_cast<void**>(aligned) - 1) = original;
return aligned;
}
@@ -142,7 +177,7 @@ EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
{
eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)");
}
-#else
+#else
EIGEN_DEVICE_FUNC inline void check_that_malloc_is_allowed()
{}
#endif
@@ -471,8 +506,8 @@ EIGEN_DEVICE_FUNC inline Index first_default_aligned(const Scalar* array, Index
}
/** \internal Returns the smallest integer multiple of \a base and greater or equal to \a size
- */
-template<typename Index>
+ */
+template<typename Index>
inline Index first_multiple(Index size, Index base)
{
return ((size+base-1)/base)*base;
@@ -493,7 +528,7 @@ template<typename T> struct smart_copy_helper<T,true> {
IntPtr size = IntPtr(end)-IntPtr(start);
if(size==0) return;
eigen_internal_assert(start!=0 && end!=0 && target!=0);
- memcpy(target, start, size);
+ fast_memcpy(target, start, size);
}
};
@@ -502,7 +537,7 @@ template<typename T> struct smart_copy_helper<T,false> {
{ std::copy(start, end, target); }
};
-// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise.
+// intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise.
template<typename T, bool UseMemmove> struct smart_memmove_helper;
template<typename T> void smart_memmove(const T* start, const T* end, T* target)
@@ -522,15 +557,15 @@ template<typename T> struct smart_memmove_helper<T,true> {
template<typename T> struct smart_memmove_helper<T,false> {
static inline void run(const T* start, const T* end, T* target)
- {
+ {
if (UIntPtr(target) < UIntPtr(start))
{
std::copy(start, end, target);
}
- else
+ else
{
std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T);
- std::copy_backward(start, end, target + count);
+ std::copy_backward(start, end, target + count);
}
}
};
@@ -603,7 +638,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
{
std::swap(a.ptr(),b.ptr());
}
-
+
} // end namespace internal
/** \internal
@@ -622,7 +657,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
* The underlying stack allocation function can controlled with the EIGEN_ALLOCA preprocessor token.
*/
#ifdef EIGEN_ALLOCA
-
+
#if EIGEN_DEFAULT_ALIGN_BYTES>0
// We always manually re-align the result of EIGEN_ALLOCA.
// If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
@@ -645,7 +680,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \
Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true)
-
+
#endif
@@ -701,7 +736,7 @@ template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
* Example:
* \code
* // Matrix4f requires 16 bytes alignment:
-* std::map< int, Matrix4f, std::less<int>,
+* std::map< int, Matrix4f, std::less<int>,
* aligned_allocator<std::pair<const int, Matrix4f> > > my_map_mat4;
* // Vector3f does not require 16 bytes alignment, no need to use Eigen's allocator:
* std::map< int, Vector3f > my_map_vec3;
diff --git a/Eigen/src/Core/util/XprHelper.h b/Eigen/src/Core/util/XprHelper.h
index b94dd61ff..d5e565cb8 100644
--- a/Eigen/src/Core/util/XprHelper.h
+++ b/Eigen/src/Core/util/XprHelper.h
@@ -638,7 +638,7 @@ struct plain_constant_type
template<typename ExpressionType>
struct is_lvalue
{
- enum { value = !bool(is_const<ExpressionType>::value) &&
+ enum { value = (!bool(is_const<ExpressionType>::value)) &&
bool(traits<ExpressionType>::Flags & LvalueBit) };
};
diff --git a/Eigen/src/SparseCore/SparseCompressedBase.h b/Eigen/src/SparseCore/SparseCompressedBase.h
index d32a8df40..e0b3c22b6 100644
--- a/Eigen/src/SparseCore/SparseCompressedBase.h
+++ b/Eigen/src/SparseCore/SparseCompressedBase.h
@@ -295,11 +295,11 @@ struct evaluator<SparseCompressedBase<Derived> >
Flags = Derived::Flags
};
- evaluator() : m_matrix(0)
+ evaluator() : m_matrix(0), m_zero(0)
{
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
}
- explicit evaluator(const Derived &mat) : m_matrix(&mat)
+ explicit evaluator(const Derived &mat) : m_matrix(&mat), m_zero(0)
{
EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
}
@@ -312,26 +312,42 @@ struct evaluator<SparseCompressedBase<Derived> >
operator const Derived&() const { return *m_matrix; }
typedef typename DenseCoeffsBase<Derived,ReadOnlyAccessors>::CoeffReturnType CoeffReturnType;
- Scalar coeff(Index row, Index col) const
- { return m_matrix->coeff(row,col); }
-
+ const Scalar& coeff(Index row, Index col) const
+ {
+ Index p = find(row,col);
+
+ if(p==Dynamic)
+ return m_zero;
+ else
+ return m_matrix->const_cast_derived().valuePtr()[p];
+ }
+
Scalar& coeffRef(Index row, Index col)
{
+ Index p = find(row,col);
+ eigen_assert(p!=Dynamic && "written coefficient does not exist");
+ return m_matrix->const_cast_derived().valuePtr()[p];
+ }
+
+protected:
+
+ Index find(Index row, Index col) const
+ {
eigen_internal_assert(row>=0 && row<m_matrix->rows() && col>=0 && col<m_matrix->cols());
-
+
const Index outer = Derived::IsRowMajor ? row : col;
const Index inner = Derived::IsRowMajor ? col : row;
Index start = m_matrix->outerIndexPtr()[outer];
Index end = m_matrix->isCompressed() ? m_matrix->outerIndexPtr()[outer+1] : m_matrix->outerIndexPtr()[outer] + m_matrix->innerNonZeroPtr()[outer];
- eigen_assert(end>start && "you are using a non finalized sparse matrix or written coefficient does not exist");
- const Index p = std::lower_bound(m_matrix->innerIndexPtr()+start, m_matrix->innerIndexPtr()+end,inner)
- - m_matrix->innerIndexPtr();
- eigen_assert((p<end) && (m_matrix->innerIndexPtr()[p]==inner) && "written coefficient does not exist");
- return m_matrix->const_cast_derived().valuePtr()[p];
+ eigen_assert(end>=start && "you are using a non finalized sparse matrix or written coefficient does not exist");
+ const Index p = std::lower_bound(m_matrix->innerIndexPtr()+start, m_matrix->innerIndexPtr()+end,inner) - m_matrix->innerIndexPtr();
+
+ return ((p<end) && (m_matrix->innerIndexPtr()[p]==inner)) ? p : Dynamic;
}
const Derived *m_matrix;
+ const Scalar m_zero;
};
}
diff --git a/test/nullary.cpp b/test/nullary.cpp
index 351d26e74..acd55506e 100644
--- a/test/nullary.cpp
+++ b/test/nullary.cpp
@@ -152,6 +152,45 @@ void testVectorType(const VectorType& base)
m.tail(size-1).setLinSpaced(low, high);
VERIFY_IS_APPROX(m(size-1), high);
}
+
+ // regression test for bug 1383 (LinSpaced with empty size/range)
+ {
+ Index n0 = VectorType::SizeAtCompileTime==Dynamic ? 0 : VectorType::SizeAtCompileTime;
+ low = internal::random<Scalar>();
+ m = VectorType::LinSpaced(n0,low,low-1);
+ VERIFY(m.size()==n0);
+
+ if(VectorType::SizeAtCompileTime==Dynamic)
+ {
+ VERIFY_IS_EQUAL(VectorType::LinSpaced(n0,0,Scalar(n0-1)).sum(),Scalar(0));
+ VERIFY_IS_EQUAL(VectorType::LinSpaced(n0,low,low-1).sum(),Scalar(0));
+ }
+
+ m.setLinSpaced(n0,0,Scalar(n0-1));
+ VERIFY(m.size()==n0);
+ m.setLinSpaced(n0,low,low-1);
+ VERIFY(m.size()==n0);
+
+ // empty range only:
+ VERIFY_IS_APPROX(VectorType::LinSpaced(size,low,low),VectorType::Constant(size,low));
+ m.setLinSpaced(size,low,low);
+ VERIFY_IS_APPROX(m,VectorType::Constant(size,low));
+
+ if(NumTraits<Scalar>::IsInteger)
+ {
+ VERIFY_IS_APPROX( VectorType::LinSpaced(size,low,Scalar(low+size-1)), VectorType::LinSpaced(size,Scalar(low+size-1),low).reverse() );
+
+ if(VectorType::SizeAtCompileTime==Dynamic)
+ {
+ // Check negative multiplicator path:
+ for(Index k=1; k<5; ++k)
+ VERIFY_IS_APPROX( VectorType::LinSpaced(size,low,Scalar(low+(size-1)*k)), VectorType::LinSpaced(size,Scalar(low+(size-1)*k),low).reverse() );
+ // Check negative divisor path:
+ for(Index k=1; k<5; ++k)
+ VERIFY_IS_APPROX( VectorType::LinSpaced(size*k,low,Scalar(low+size-1)), VectorType::LinSpaced(size*k,Scalar(low+size-1),low).reverse() );
+ }
+ }
+ }
}
template<typename MatrixType>
@@ -198,7 +237,8 @@ void test_nullary()
CALL_SUBTEST_8( testVectorType(Matrix<float,8,1>()) );
CALL_SUBTEST_8( testVectorType(Matrix<float,1,1>()) );
- CALL_SUBTEST_9( testVectorType(VectorXi(internal::random<int>(1,300))) );
+ CALL_SUBTEST_9( testVectorType(VectorXi(internal::random<int>(1,10))) );
+ CALL_SUBTEST_9( testVectorType(VectorXi(internal::random<int>(9,300))) );
CALL_SUBTEST_9( testVectorType(Matrix<int,1,1>()) );
}
diff --git a/test/sparse_basic.cpp b/test/sparse_basic.cpp
index 208a66f12..91b7cb335 100644
--- a/test/sparse_basic.cpp
+++ b/test/sparse_basic.cpp
@@ -485,6 +485,10 @@ template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& re
SparseMatrixType m2(rows, cols);
initSparse<Scalar>(density, refMat2, m2);
VERIFY_IS_APPROX(m2.diagonal(), refMat2.diagonal().eval());
+ DenseVector d = m2.diagonal();
+ VERIFY_IS_APPROX(d, refMat2.diagonal().eval());
+ d = m2.diagonal().array();
+ VERIFY_IS_APPROX(d, refMat2.diagonal().eval());
VERIFY_IS_APPROX(const_cast<const SparseMatrixType&>(m2).diagonal(), refMat2.diagonal().eval());
initSparse<Scalar>(density, refMat2, m2, ForceNonZeroDiag);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
index 442c14fac..39012b937 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
@@ -56,7 +56,7 @@ void pack_simple(Scalar * dst, const Scalar * src, Index cols, Index rows, Index
} else {
// Naive memcpy calls
for (Index col = 0; col < cols; ++col) {
- memcpy(dst + col*lddst, src + col*ldsrc, rows*sizeof(Scalar));
+ internal::fast_memcpy(dst + col*lddst, src + col*ldsrc, rows*sizeof(Scalar));
}
}
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
index ccaaa6cb2..b133781ae 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceDefault.h
@@ -22,7 +22,7 @@ struct DefaultDevice {
internal::aligned_free(buffer);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
- ::memcpy(dst, src, n);
+ internal::fast_memcpy(dst, src, n);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const {
memcpy(dst, src, n);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
index 16180ca69..facdea735 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
@@ -106,7 +106,7 @@ struct ThreadPoolDevice {
}
EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
- ::memcpy(dst, src, n);
+ internal::fast_memcpy(dst, src, n);
}
EIGEN_STRONG_INLINE void memcpyHostToDevice(void* dst, const void* src, size_t n) const {
memcpy(dst, src, n);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
index 08eb5595a..f060191ab 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
@@ -253,7 +253,7 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D
// get data into line_buf
const Index stride = m_strides[dim];
if (stride == 1) {
- memcpy(line_buf, &buf[base_offset], line_len*sizeof(ComplexScalar));
+ m_device.memcpy(line_buf, &buf[base_offset], line_len*sizeof(ComplexScalar));
} else {
Index offset = base_offset;
for (int j = 0; j < line_len; ++j, offset += stride) {
@@ -271,7 +271,7 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D
// write back
if (FFTDir == FFT_FORWARD && stride == 1) {
- memcpy(&buf[base_offset], line_buf, line_len*sizeof(ComplexScalar));
+ m_device.memcpy(&buf[base_offset], line_buf, line_len*sizeof(ComplexScalar));
} else {
Index offset = base_offset;
const ComplexScalar div_factor = ComplexScalar(1.0 / line_len, 0);