diff options
author | Luke Iwanski <luke@codeplay.com> | 2016-09-19 14:03:54 +0100 |
---|---|---|
committer | Luke Iwanski <luke@codeplay.com> | 2016-09-19 14:03:54 +0100 |
commit | b91e0211727b9ea5d7c30908ed86afc4e50d4c6c (patch) | |
tree | 4dd857d1aabdd4065e8b7dec1369a4bd06bac5e7 /unsupported | |
parent | cb81975714a96ecb2faf33ca242feeee3543b1db (diff) | |
parent | ff47717f25aeede4878f65b214cdce264b8314e8 (diff) |
Merged with default.
Diffstat (limited to 'unsupported')
57 files changed, 1575 insertions, 339 deletions
diff --git a/unsupported/Eigen/CMakeLists.txt b/unsupported/Eigen/CMakeLists.txt index 7478b6b0d..631a06014 100644 --- a/unsupported/Eigen/CMakeLists.txt +++ b/unsupported/Eigen/CMakeLists.txt @@ -4,6 +4,7 @@ set(Eigen_HEADERS ArpackSupport AutoDiff BVH + EulerAngles FFT IterativeSolvers KroneckerProduct @@ -26,5 +27,6 @@ install(FILES DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen COMPONENT Devel ) -add_subdirectory(src) -add_subdirectory(CXX11)
\ No newline at end of file +install(DIRECTORY src DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen COMPONENT Devel FILES_MATCHING PATTERN "*.h") + +add_subdirectory(CXX11) diff --git a/unsupported/Eigen/CXX11/CMakeLists.txt b/unsupported/Eigen/CXX11/CMakeLists.txt index a40bc4715..385ed240c 100644 --- a/unsupported/Eigen/CXX11/CMakeLists.txt +++ b/unsupported/Eigen/CXX11/CMakeLists.txt @@ -5,4 +5,4 @@ install(FILES DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11 COMPONENT Devel ) -add_subdirectory(src) +install(DIRECTORY src DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11 COMPONENT Devel FILES_MATCHING PATTERN "*.h") diff --git a/unsupported/Eigen/CXX11/src/CMakeLists.txt b/unsupported/Eigen/CXX11/src/CMakeLists.txt deleted file mode 100644 index 1734262bb..000000000 --- a/unsupported/Eigen/CXX11/src/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -add_subdirectory(util) -add_subdirectory(ThreadPool) -add_subdirectory(Tensor) -add_subdirectory(TensorSymmetry) diff --git a/unsupported/Eigen/CXX11/src/Tensor/CMakeLists.txt b/unsupported/Eigen/CXX11/src/Tensor/CMakeLists.txt deleted file mode 100644 index 6d4b3ea0d..000000000 --- a/unsupported/Eigen/CXX11/src/Tensor/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_CXX11_Tensor_SRCS "*.h") - -INSTALL(FILES - ${Eigen_CXX11_Tensor_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11/src/Tensor COMPONENT Devel - ) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index e3880d2e0..3c8710255 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -192,6 +192,12 @@ class TensorBase<Derived, ReadOnlyAccessors> } EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_log1p_op<Scalar>, const Derived> + log1p() const { + return unaryExpr(internal::scalar_log1p_op<Scalar>()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived> abs() const { return unaryExpr(internal::scalar_abs_op<Scalar>()); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index 56d9c2025..20b29e5fd 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -25,8 +25,9 @@ template<typename Dimensions, typename LhsXprType, typename RhsXprType> struct traits<TensorContractionOp<Dimensions, LhsXprType, RhsXprType> > { // Type promotion to handle the case where the types of the lhs and the rhs are different. - typedef typename internal::promote_storage_type<typename LhsXprType::Scalar, - typename RhsXprType::Scalar>::ret Scalar; + typedef typename gebp_traits<typename remove_const<typename LhsXprType::Scalar>::type, + typename remove_const<typename RhsXprType::Scalar>::type>::ResScalar Scalar; + typedef typename promote_storage_type<typename traits<LhsXprType>::StorageKind, typename traits<RhsXprType>::StorageKind>::ret StorageKind; typedef typename promote_index_type<typename traits<LhsXprType>::Index, @@ -75,8 +76,8 @@ class TensorContractionOp : public TensorBase<TensorContractionOp<Indices, LhsXp { public: typedef typename Eigen::internal::traits<TensorContractionOp>::Scalar Scalar; - typedef typename internal::promote_storage_type<typename LhsXprType::CoeffReturnType, - typename RhsXprType::CoeffReturnType>::ret CoeffReturnType; + typedef typename internal::gebp_traits<typename LhsXprType::CoeffReturnType, + typename RhsXprType::CoeffReturnType>::ResScalar CoeffReturnType; typedef typename Eigen::internal::nested<TensorContractionOp>::type Nested; typedef typename Eigen::internal::traits<TensorContractionOp>::StorageKind StorageKind; typedef typename Eigen::internal::traits<TensorContractionOp>::Index Index; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h b/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h index a76c8ca35..d66e45d50 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h @@ -91,21 +91,21 @@ class TensorOpCost { } // TODO(rmlarsen): Define min in terms of total cost, not elementwise. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost& cwiseMin( - const TensorOpCost& rhs) { - bytes_loaded_ = numext::mini(bytes_loaded_, rhs.bytes_loaded()); - bytes_stored_ = numext::mini(bytes_stored_, rhs.bytes_stored()); - compute_cycles_ = numext::mini(compute_cycles_, rhs.compute_cycles()); - return *this; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost cwiseMin( + const TensorOpCost& rhs) const { + double bytes_loaded = numext::mini(bytes_loaded_, rhs.bytes_loaded()); + double bytes_stored = numext::mini(bytes_stored_, rhs.bytes_stored()); + double compute_cycles = numext::mini(compute_cycles_, rhs.compute_cycles()); + return TensorOpCost(bytes_loaded, bytes_stored, compute_cycles); } // TODO(rmlarsen): Define max in terms of total cost, not elementwise. - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost& cwiseMax( - const TensorOpCost& rhs) { - bytes_loaded_ = numext::maxi(bytes_loaded_, rhs.bytes_loaded()); - bytes_stored_ = numext::maxi(bytes_stored_, rhs.bytes_stored()); - compute_cycles_ = numext::maxi(compute_cycles_, rhs.compute_cycles()); - return *this; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost cwiseMax( + const TensorOpCost& rhs) const { + double bytes_loaded = numext::maxi(bytes_loaded_, rhs.bytes_loaded()); + double bytes_stored = numext::maxi(bytes_stored_, rhs.bytes_stored()); + double compute_cycles = numext::maxi(compute_cycles_, rhs.compute_cycles()); + return TensorOpCost(bytes_loaded, bytes_stored, compute_cycles); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost& operator+=( diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index b2b4bcf62..834ce07df 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -239,7 +239,7 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device> EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) - : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device) + : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper() { } typedef typename XprType::Index Index; @@ -256,13 +256,13 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device> EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const { - return m_functor(index); + return m_wrapper(m_functor, index); } template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - return m_functor.template packetOp<Index, PacketReturnType>(index); + return m_wrapper.template packetOp<PacketReturnType, Index>(m_functor, index); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost @@ -282,6 +282,7 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device> private: const NullaryOp m_functor; TensorEvaluator<ArgType, Device> m_argImpl; + const internal::nullary_wrapper<CoeffReturnType,NullaryOp> m_wrapper; }; @@ -612,7 +613,7 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType> .cwiseMax(m_elseImpl.costPerCoeff(vectorized)); } - EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const { return NULL; } /// required by sycl in order to extract the accessor const TensorEvaluator<IfArgType, Device> & cond_impl() const { return m_condImpl; } /// required by sycl in order to extract the accessor diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index a8e48fced..fc75dbb5c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -25,7 +25,7 @@ struct scalar_mod_op { }; template <typename Scalar> struct functor_traits<scalar_mod_op<Scalar> > -{ enum { Cost = NumTraits<Scalar>::template Div<false>::Cost, PacketAccess = false }; }; +{ enum { Cost = scalar_div_cost<Scalar,false>::value, PacketAccess = false }; }; /** \internal @@ -38,7 +38,7 @@ struct scalar_mod2_op { }; template <typename Scalar> struct functor_traits<scalar_mod2_op<Scalar> > -{ enum { Cost = NumTraits<Scalar>::template Div<false>::Cost, PacketAccess = false }; }; +{ enum { Cost = scalar_div_cost<Scalar,false>::value, PacketAccess = false }; }; template <typename Scalar> struct scalar_fmod_op { @@ -188,6 +188,32 @@ struct reducer_traits<MeanReducer<T>, Device> { }; +template <typename T, bool IsMax = true, bool IsInteger = true> +struct MinMaxBottomValue { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static T bottom_value() { + return Eigen::NumTraits<T>::lowest(); + } +}; +template <typename T> +struct MinMaxBottomValue<T, true, false> { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static T bottom_value() { + return -Eigen::NumTraits<T>::infinity(); + } +}; +template <typename T> +struct MinMaxBottomValue<T, false, true> { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static T bottom_value() { + return Eigen::NumTraits<T>::highest(); + } +}; +template <typename T> +struct MinMaxBottomValue<T, false, false> { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static T bottom_value() { + return Eigen::NumTraits<T>::infinity(); + } +}; + + template <typename T> struct MaxReducer { static const bool PacketAccess = packet_traits<T>::HasMax; @@ -200,9 +226,8 @@ template <typename T> struct MaxReducer EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { (*accum) = pmax<Packet>(*accum, p); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - return Eigen::NumTraits<T>::lowest(); + return MinMaxBottomValue<T, true, Eigen::NumTraits<T>::IsInteger>::bottom_value(); } template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { @@ -242,9 +267,8 @@ template <typename T> struct MinReducer EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { (*accum) = pmin<Packet>(*accum, p); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - return Eigen::NumTraits<T>::highest(); + return MinMaxBottomValue<T, false, Eigen::NumTraits<T>::IsInteger>::bottom_value(); } template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { @@ -454,12 +478,11 @@ template <typename T> class UniformRandomGenerator { m_deterministic = other.m_deterministic; } - template<typename Index> - T operator()(Index) const { + T operator()() const { return random<T>(); } - template<typename Index, typename PacketType> - PacketType packetOp(Index) const { + template<typename PacketType> + PacketType packetOp() const { const int packetSize = internal::unpacket_traits<PacketType>::size; EIGEN_ALIGN_MAX T values[packetSize]; for (int i = 0; i < packetSize; ++i) { @@ -484,23 +507,22 @@ template <> class UniformRandomGenerator<float> { } UniformRandomGenerator(const UniformRandomGenerator<float>& other) { m_generator = new std::mt19937(); - m_generator->seed(other(0) * UINT_MAX); + m_generator->seed(other() * UINT_MAX); m_deterministic = other.m_deterministic; } ~UniformRandomGenerator() { delete m_generator; } - template<typename Index> - float operator()(Index) const { + float operator()() const { return m_distribution(*m_generator); } - template<typename Index, typename PacketType> - PacketType packetOp(Index i) const { + template<typename PacketType> + PacketType packetOp() const { const int packetSize = internal::unpacket_traits<PacketType>::size; EIGEN_ALIGN_MAX float values[packetSize]; for (int k = 0; k < packetSize; ++k) { - values[k] = this->operator()(i); + values[k] = this->operator()(); } return internal::pload<PacketType>(values); } @@ -525,23 +547,22 @@ template <> class UniformRandomGenerator<double> { } UniformRandomGenerator(const UniformRandomGenerator<double>& other) { m_generator = new std::mt19937(); - m_generator->seed(other(0) * UINT_MAX); + m_generator->seed(other() * UINT_MAX); m_deterministic = other.m_deterministic; } ~UniformRandomGenerator() { delete m_generator; } - template<typename Index> - double operator()(Index) const { + double operator()() const { return m_distribution(*m_generator); } - template<typename Index, typename PacketType> - PacketType packetOp(Index i) const { + template<typename PacketType> + PacketType packetOp() const { const int packetSize = internal::unpacket_traits<PacketType>::size; EIGEN_ALIGN_MAX double values[packetSize]; for (int k = 0; k < packetSize; ++k) { - values[k] = this->operator()(i); + values[k] = this->operator()(); } return internal::pload<PacketType>(values); } @@ -578,12 +599,11 @@ template <> class UniformRandomGenerator<float> { curand_init(seed, tid, 0, &m_state); } - template<typename Index> - __device__ float operator()(Index) const { + __device__ float operator()() const { return curand_uniform(&m_state); } - template<typename Index, typename PacketType> - __device__ float4 packetOp(Index) const { + template<typename PacketType> + __device__ float4 packetOp() const { EIGEN_STATIC_ASSERT((is_same<PacketType, float4>::value), YOU_MADE_A_PROGRAMMING_MISTAKE); return curand_uniform4(&m_state); } @@ -608,12 +628,11 @@ template <> class UniformRandomGenerator<double> { const int seed = m_deterministic ? 0 : get_random_seed(); curand_init(seed, tid, 0, &m_state); } - template<typename Index> - __device__ double operator()(Index) const { + __device__ double operator()() const { return curand_uniform_double(&m_state); } - template<typename Index, typename PacketType> - __device__ double2 packetOp(Index) const { + template<typename PacketType> + __device__ double2 packetOp() const { EIGEN_STATIC_ASSERT((is_same<PacketType, double2>::value), YOU_MADE_A_PROGRAMMING_MISTAKE); return curand_uniform2_double(&m_state); } @@ -638,8 +657,7 @@ template <> class UniformRandomGenerator<std::complex<float> > { const int seed = m_deterministic ? 0 : get_random_seed(); curand_init(seed, tid, 0, &m_state); } - template<typename Index> - __device__ std::complex<float> operator()(Index) const { + __device__ std::complex<float> operator()() const { float4 vals = curand_uniform4(&m_state); return std::complex<float>(vals.x, vals.y); } @@ -664,8 +682,7 @@ template <> class UniformRandomGenerator<std::complex<double> > { const int seed = m_deterministic ? 0 : get_random_seed(); curand_init(seed, tid, 0, &m_state); } - template<typename Index> - __device__ std::complex<double> operator()(Index) const { + __device__ std::complex<double> operator()() const { double2 vals = curand_uniform2_double(&m_state); return std::complex<double>(vals.x, vals.y); } @@ -701,17 +718,16 @@ template <typename T> class NormalRandomGenerator { } NormalRandomGenerator(const NormalRandomGenerator& other) : m_deterministic(other.m_deterministic), m_distribution(other.m_distribution), m_generator(new std::mt19937()) { - m_generator->seed(other(0) * UINT_MAX); + m_generator->seed(other() * UINT_MAX); } ~NormalRandomGenerator() { delete m_generator; } - template<typename Index> - T operator()(Index) const { + T operator()() const { return m_distribution(*m_generator); } - template<typename Index, typename PacketType> - PacketType packetOp(Index) const { + template<typename PacketType> + PacketType packetOp() const { const int packetSize = internal::unpacket_traits<PacketType>::size; EIGEN_ALIGN_MAX T values[packetSize]; for (int i = 0; i < packetSize; ++i) { @@ -749,12 +765,11 @@ template <> class NormalRandomGenerator<float> { const int seed = m_deterministic ? 0 : get_random_seed(); curand_init(seed, tid, 0, &m_state); } - template<typename Index> - __device__ float operator()(Index) const { + __device__ float operator()() const { return curand_normal(&m_state); } - template<typename Index, typename PacketType> - __device__ float4 packetOp(Index) const { + template<typename PacketType> + __device__ float4 packetOp() const { EIGEN_STATIC_ASSERT((is_same<PacketType, float4>::value), YOU_MADE_A_PROGRAMMING_MISTAKE); return curand_normal4(&m_state); } @@ -779,12 +794,11 @@ template <> class NormalRandomGenerator<double> { const int seed = m_deterministic ? 0 : get_random_seed(); curand_init(seed, tid, 0, &m_state); } - template<typename Index> - __device__ double operator()(Index) const { + __device__ double operator()() const { return curand_normal_double(&m_state); } - template<typename Index, typename PacketType> - __device__ double2 packetOp(Index) const { + template<typename PacketType> + __device__ double2 packetOp() const { EIGEN_STATIC_ASSERT((is_same<PacketType, double2>::value), YOU_MADE_A_PROGRAMMING_MISTAKE); return curand_normal2_double(&m_state); } @@ -809,8 +823,7 @@ template <> class NormalRandomGenerator<std::complex<float> > { const int seed = m_deterministic ? 0 : get_random_seed(); curand_init(seed, tid, 0, &m_state); } - template<typename Index> - __device__ std::complex<float> operator()(Index) const { + __device__ std::complex<float> operator()() const { float4 vals = curand_normal4(&m_state); return std::complex<float>(vals.x, vals.y); } @@ -835,8 +848,7 @@ template <> class NormalRandomGenerator<std::complex<double> > { const int seed = m_deterministic ? 0 : get_random_seed(); curand_init(seed, tid, 0, &m_state); } - template<typename Index> - __device__ std::complex<double> operator()(Index) const { + __device__ std::complex<double> operator()() const { double2 vals = curand_normal2_double(&m_state); return std::complex<double>(vals.x, vals.y); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 9df697e4c..a87777b22 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -505,9 +505,14 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device> (reducing_inner_dims || ReducingInnerMostDims)) { const Index num_values_to_reduce = internal::array_prod(m_reducedDims); const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); - if (!data && num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 128) { - data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); - m_result = data; + if (!data) { + if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 128) { + data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); + m_result = data; + } + else { + return true; + } } Op reducer(m_reducer); if (internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) { @@ -533,9 +538,14 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device> preserving_inner_dims) { const Index num_values_to_reduce = internal::array_prod(m_reducedDims); const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); - if (!data && num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 32) { - data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); - m_result = data; + if (!data) { + if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 32) { + data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); + m_result = data; + } + else { + return true; + } } Op reducer(m_reducer); if (internal::OuterReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) { @@ -556,6 +566,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device> m_impl.cleanup(); if (m_result) { m_device.deallocate(m_result); + m_result = NULL; } } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h index 5e512490c..65638b6a8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -67,11 +67,21 @@ __device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer) #endif } +// We extend atomicExch to support extra data types +template <typename Type> +__device__ inline Type atomicExchCustom(Type* address, Type val) { + return atomicExch(address, val); +} + +template <> +__device__ inline double atomicExchCustom(double* address, double val) { + unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(address); + return __longlong_as_double(atomicExch(address_as_ull, __double_as_longlong(val))); +} #ifdef EIGEN_HAS_CUDA_FP16 template <template <typename T> class R> __device__ inline void atomicReduce(half2* output, half2 accum, R<half>& reducer) { -#if __CUDA_ARCH__ >= 300 unsigned int oldval = *reinterpret_cast<unsigned int*>(output); unsigned int newval = oldval; reducer.reducePacket(accum, reinterpret_cast<half2*>(&newval)); @@ -87,9 +97,6 @@ __device__ inline void atomicReduce(half2* output, half2 accum, R<half>& reducer return; } } -#else - assert(0 && "Shouldn't be called on unsupported device"); -#endif } #endif @@ -130,7 +137,7 @@ __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num unsigned int block = atomicCAS(semaphore, 0u, 1u); if (block == 0) { // We're the first block to run, initialize the output value - atomicExch(output, reducer.initialize()); + atomicExchCustom(output, reducer.initialize()); __threadfence(); atomicExch(semaphore, 2u); } @@ -263,17 +270,22 @@ __global__ void ReductionCleanupKernelHalfFloat(Op& reducer, half* output, half2 #endif - -template <typename Self, typename Op, typename OutputType, bool PacketAccess> +template <typename Self, typename Op, typename OutputType, bool PacketAccess, typename Enabled = void> struct FullReductionLauncher { static void run(const Self&, Op&, const GpuDevice&, OutputType*, typename Self::Index) { - assert(false && "Should only be called on floats and half floats"); + assert(false && "Should only be called on doubles, floats and half floats"); } }; -template <typename Self, typename Op, bool PacketAccess> -struct FullReductionLauncher<Self, Op, float, PacketAccess> { - static void run(const Self& self, Op& reducer, const GpuDevice& device, float* output, typename Self::Index num_coeffs) { +// Specialization for float and double +template <typename Self, typename Op, typename OutputType, bool PacketAccess> +struct FullReductionLauncher< + Self, Op, OutputType, PacketAccess, + typename internal::enable_if< + internal::is_same<float, OutputType>::value || + internal::is_same<double, OutputType>::value, + void>::type> { + static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs) { typedef typename Self::Index Index; typedef typename Self::CoeffReturnType Scalar; const int block_size = 256; @@ -330,20 +342,22 @@ struct FullReductionLauncher<Self, Op, Eigen::half, true> { template <typename Self, typename Op, bool Vectorizable> struct FullReducer<Self, Op, GpuDevice, Vectorizable> { // Unfortunately nvidia doesn't support well exotic types such as complex, - // so reduce the scope of the optimized version of the code to the simple case - // of floats and half floats. + // so reduce the scope of the optimized version of the code to the simple cases + // of doubles, floats and half floats #ifdef EIGEN_HAS_CUDA_FP16 static const bool HasOptimizedImplementation = !Op::IsStateful && (internal::is_same<typename Self::CoeffReturnType, float>::value || + internal::is_same<typename Self::CoeffReturnType, double>::value || (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess)); #else static const bool HasOptimizedImplementation = !Op::IsStateful && - internal::is_same<typename Self::CoeffReturnType, float>::value; + (internal::is_same<typename Self::CoeffReturnType, float>::value || + internal::is_same<typename Self::CoeffReturnType, double>::value); #endif template <typename OutputType> static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output) { - assert(HasOptimizedImplementation && "Should only be called on floats or half floats"); + assert(HasOptimizedImplementation && "Should only be called on doubles, floats or half floats"); const Index num_coeffs = array_prod(self.m_impl.dimensions()); // Don't crash when we're called with an input tensor of size 0. if (num_coeffs == 0) { @@ -360,6 +374,7 @@ template <int NumPerThread, typename Self, __global__ void InnerReductionKernel(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs, typename Self::CoeffReturnType* output) { #if __CUDA_ARCH__ >= 300 + typedef typename Self::CoeffReturnType Type; eigen_assert(blockDim.y == 1); eigen_assert(blockDim.z == 1); eigen_assert(gridDim.y == 1); @@ -389,13 +404,13 @@ __global__ void InnerReductionKernel(Reducer reducer, const Self input, Index nu const Index col_block = i % input_col_blocks; const Index col_begin = col_block * blockDim.x * NumPerThread + threadIdx.x; - float reduced_val = reducer.initialize(); + Type reduced_val = reducer.initialize(); for (Index j = 0; j < NumPerThread; j += unroll_times) { const Index last_col = col_begin + blockDim.x * (j + unroll_times - 1); if (last_col >= num_coeffs_to_reduce) { for (Index col = col_begin + blockDim.x * j; col < num_coeffs_to_reduce; col += blockDim.x) { - const float val = input.m_impl.coeff(row * num_coeffs_to_reduce + col); + const Type val = input.m_impl.coeff(row * num_coeffs_to_reduce + col); reducer.reduce(val, &reduced_val); } break; @@ -521,17 +536,23 @@ __global__ void InnerReductionKernelHalfFloat(Reducer reducer, const Self input, #endif -template <typename Self, typename Op, typename OutputType, bool PacketAccess> +template <typename Self, typename Op, typename OutputType, bool PacketAccess, typename Enabled = void> struct InnerReductionLauncher { static EIGEN_DEVICE_FUNC bool run(const Self&, Op&, const GpuDevice&, OutputType*, typename Self::Index, typename Self::Index) { - assert(false && "Should only be called to reduce floats and half floats on a gpu device"); + assert(false && "Should only be called to reduce doubles, floats and half floats on a gpu device"); return true; } }; -template <typename Self, typename Op, bool PacketAccess> -struct InnerReductionLauncher<Self, Op, float, PacketAccess> { - static bool run(const Self& self, Op& reducer, const GpuDevice& device, float* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) { +// Specialization for float and double +template <typename Self, typename Op, typename OutputType, bool PacketAccess> +struct InnerReductionLauncher< + Self, Op, OutputType, PacketAccess, + typename internal::enable_if< + internal::is_same<float, OutputType>::value || + internal::is_same<double, OutputType>::value, + void>::type> { + static bool run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) { typedef typename Self::Index Index; const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals; @@ -549,7 +570,7 @@ struct InnerReductionLauncher<Self, Op, float, PacketAccess> { const int max_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / 1024; const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks); - LAUNCH_CUDA_KERNEL((ReductionInitKernel<float, Index>), + LAUNCH_CUDA_KERNEL((ReductionInitKernel<OutputType, Index>), num_blocks, 1024, 0, device, reducer.initialize(), num_preserved_vals, output); } @@ -616,15 +637,17 @@ struct InnerReducer<Self, Op, GpuDevice> { #ifdef EIGEN_HAS_CUDA_FP16 static const bool HasOptimizedImplementation = !Op::IsStateful && (internal::is_same<typename Self::CoeffReturnType, float>::value || + internal::is_same<typename Self::CoeffReturnType, double>::value || (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess)); #else static const bool HasOptimizedImplementation = !Op::IsStateful && - internal::is_same<typename Self::CoeffReturnType, float>::value; + (internal::is_same<typename Self::CoeffReturnType, float>::value || + internal::is_same<typename Self::CoeffReturnType, double>::value); #endif template <typename OutputType> static bool run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) { - assert(HasOptimizedImplementation && "Should only be called on floats or half floats"); + assert(HasOptimizedImplementation && "Should only be called on doubles, floats or half floats"); const Index num_coeffs = array_prod(self.m_impl.dimensions()); // Don't crash when we're called with an input tensor of size 0. if (num_coeffs == 0) { @@ -675,11 +698,11 @@ struct OuterReducer<Self, Op, GpuDevice> { // so reduce the scope of the optimized version of the code to the simple case // of floats. static const bool HasOptimizedImplementation = !Op::IsStateful && - internal::is_same<typename Self::CoeffReturnType, float>::value; - + (internal::is_same<typename Self::CoeffReturnType, float>::value || + internal::is_same<typename Self::CoeffReturnType, double>::value); template <typename Device, typename OutputType> static EIGEN_DEVICE_FUNC bool run(const Self&, Op&, const Device&, OutputType*, typename Self::Index, typename Self::Index) { - assert(false && "Should only be called to reduce floats on a gpu device"); + assert(false && "Should only be called to reduce doubles or floats on a gpu device"); return true; } diff --git a/unsupported/Eigen/CXX11/src/TensorSymmetry/CMakeLists.txt b/unsupported/Eigen/CXX11/src/TensorSymmetry/CMakeLists.txt deleted file mode 100644 index 6e871a8da..000000000 --- a/unsupported/Eigen/CXX11/src/TensorSymmetry/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -FILE(GLOB Eigen_CXX11_TensorSymmetry_SRCS "*.h") - -INSTALL(FILES - ${Eigen_CXX11_TensorSymmetry_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11/src/TensorSymmetry COMPONENT Devel - ) - -add_subdirectory(util) diff --git a/unsupported/Eigen/CXX11/src/TensorSymmetry/util/CMakeLists.txt b/unsupported/Eigen/CXX11/src/TensorSymmetry/util/CMakeLists.txt deleted file mode 100644 index dc9fc78ec..000000000 --- a/unsupported/Eigen/CXX11/src/TensorSymmetry/util/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_CXX11_TensorSymmetry_util_SRCS "*.h") - -INSTALL(FILES - ${Eigen_CXX11_TensorSymmetry_util_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11/src/TensorSymmetry/util COMPONENT Devel - ) diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/CMakeLists.txt b/unsupported/Eigen/CXX11/src/ThreadPool/CMakeLists.txt deleted file mode 100644 index 88fef50c6..000000000 --- a/unsupported/Eigen/CXX11/src/ThreadPool/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_CXX11_ThreadPool_SRCS "*.h") - -INSTALL(FILES - ${Eigen_CXX11_ThreadPool_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11/src/ThreadPool COMPONENT Devel - ) diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h b/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h index 12b80d6c4..71d55552d 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h @@ -50,7 +50,7 @@ class EventCount { public: class Waiter; - EventCount(std::vector<Waiter>& waiters) : waiters_(waiters) { + EventCount(MaxSizeVector<Waiter>& waiters) : waiters_(waiters) { eigen_assert(waiters.size() < (1 << kWaiterBits) - 1); // Initialize epoch to something close to overflow to test overflow. state_ = kStackMask | (kEpochMask - kEpochInc * waiters.size() * 2); @@ -199,7 +199,7 @@ class EventCount { static const uint64_t kEpochMask = ((1ull << kEpochBits) - 1) << kEpochShift; static const uint64_t kEpochInc = 1ull << kEpochShift; std::atomic<uint64_t> state_; - std::vector<Waiter>& waiters_; + MaxSizeVector<Waiter>& waiters_; void Park(Waiter* w) { std::unique_lock<std::mutex> lock(w->mu); diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h index 33ae45131..354bce52a 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h @@ -29,6 +29,8 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface { spinning_(0), done_(false), ec_(waiters_) { + waiters_.resize(num_threads); + // Calculate coprimes of num_threads. // Coprimes are used for a random walk over all threads in Steal // and NonEmptyQueueIndex. Iteration is based on the fact that if we take @@ -123,7 +125,7 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface { MaxSizeVector<Thread*> threads_; MaxSizeVector<Queue*> queues_; MaxSizeVector<unsigned> coprimes_; - std::vector<EventCount::Waiter> waiters_; + MaxSizeVector<EventCount::Waiter> waiters_; std::atomic<unsigned> blocked_; std::atomic<bool> spinning_; std::atomic<bool> done_; diff --git a/unsupported/Eigen/CXX11/src/util/CMakeLists.txt b/unsupported/Eigen/CXX11/src/util/CMakeLists.txt deleted file mode 100644 index 7eab492d6..000000000 --- a/unsupported/Eigen/CXX11/src/util/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_CXX11_util_SRCS "*.h") - -INSTALL(FILES - ${Eigen_CXX11_util_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11/src/util COMPONENT Devel - ) diff --git a/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h b/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h index 961456f10..4bc3dd1ba 100644 --- a/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h +++ b/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h @@ -55,6 +55,17 @@ class MaxSizeVector { internal::aligned_free(data_); } + void resize(size_t n) { + eigen_assert(n <= reserve_); + for (size_t i = size_; i < n; ++i) { + new (&data_[i]) T; + } + for (size_t i = n; i < size_; ++i) { + data_[i].~T(); + } + size_ = n; + } + // Append new elements (up to reserved size). EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void push_back(const T& t) { diff --git a/unsupported/Eigen/EulerAngles b/unsupported/Eigen/EulerAngles new file mode 100644 index 000000000..521fa3f76 --- /dev/null +++ b/unsupported/Eigen/EulerAngles @@ -0,0 +1,43 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Tal Hadad <tal_hd@hotmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_EULERANGLES_MODULE_H +#define EIGEN_EULERANGLES_MODULE_H + + +#include "Eigen/Core" +#include "Eigen/Geometry" + +#include "Eigen/src/Core/util/DisableStupidWarnings.h" + +namespace Eigen { + +/** + * \defgroup EulerAngles_Module EulerAngles module + * \brief This module provides generic euler angles rotation. + * + * Euler angles are a way to represent 3D rotation. + * + * In order to use this module in your code, include this header: + * \code + * #include <unsupported/Eigen/EulerAngles> + * \endcode + * + * See \ref EulerAngles for more information. + * + */ + +} + +#include "src/EulerAngles/EulerSystem.h" +#include "src/EulerAngles/EulerAngles.h" + +#include "Eigen/src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_EULERANGLES_MODULE_H diff --git a/unsupported/Eigen/KroneckerProduct b/unsupported/Eigen/KroneckerProduct index c932c06a6..5f5afb8cf 100644 --- a/unsupported/Eigen/KroneckerProduct +++ b/unsupported/Eigen/KroneckerProduct @@ -13,6 +13,8 @@ #include "../../Eigen/src/Core/util/DisableStupidWarnings.h" +#include "../../Eigen/src/SparseCore/SparseUtil.h" + namespace Eigen { /** diff --git a/unsupported/Eigen/src/AutoDiff/CMakeLists.txt b/unsupported/Eigen/src/AutoDiff/CMakeLists.txt deleted file mode 100644 index ad91fd9c4..000000000 --- a/unsupported/Eigen/src/AutoDiff/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_AutoDiff_SRCS "*.h") - -INSTALL(FILES - ${Eigen_AutoDiff_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/AutoDiff COMPONENT Devel - ) diff --git a/unsupported/Eigen/src/BVH/CMakeLists.txt b/unsupported/Eigen/src/BVH/CMakeLists.txt deleted file mode 100644 index b377d865c..000000000 --- a/unsupported/Eigen/src/BVH/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_BVH_SRCS "*.h") - -INSTALL(FILES - ${Eigen_BVH_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/BVH COMPONENT Devel - ) diff --git a/unsupported/Eigen/src/CMakeLists.txt b/unsupported/Eigen/src/CMakeLists.txt deleted file mode 100644 index f42946793..000000000 --- a/unsupported/Eigen/src/CMakeLists.txt +++ /dev/null @@ -1,16 +0,0 @@ -ADD_SUBDIRECTORY(AutoDiff) -ADD_SUBDIRECTORY(BVH) -ADD_SUBDIRECTORY(Eigenvalues) -ADD_SUBDIRECTORY(FFT) -ADD_SUBDIRECTORY(IterativeSolvers) -ADD_SUBDIRECTORY(LevenbergMarquardt) -ADD_SUBDIRECTORY(MatrixFunctions) -ADD_SUBDIRECTORY(MoreVectorization) -ADD_SUBDIRECTORY(NonLinearOptimization) -ADD_SUBDIRECTORY(NumericalDiff) -ADD_SUBDIRECTORY(Polynomials) -ADD_SUBDIRECTORY(Skyline) -ADD_SUBDIRECTORY(SparseExtra) -ADD_SUBDIRECTORY(SpecialFunctions) -ADD_SUBDIRECTORY(KroneckerProduct) -ADD_SUBDIRECTORY(Splines) diff --git a/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h b/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h index 3b6a69aff..866a8a460 100644 --- a/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h +++ b/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h @@ -628,15 +628,15 @@ ArpackGeneralizedSelfAdjointEigenSolver<MatrixType, MatrixSolver, BisSPD>& m_info = Success; } - delete select; + delete[] select; } - delete v; - delete iparam; - delete ipntr; - delete workd; - delete workl; - delete resid; + delete[] v; + delete[] iparam; + delete[] ipntr; + delete[] workd; + delete[] workl; + delete[] resid; m_isInitialized = true; diff --git a/unsupported/Eigen/src/Eigenvalues/CMakeLists.txt b/unsupported/Eigen/src/Eigenvalues/CMakeLists.txt deleted file mode 100644 index 1d4387c82..000000000 --- a/unsupported/Eigen/src/Eigenvalues/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Eigenvalues_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Eigenvalues_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/Eigenvalues COMPONENT Devel - ) diff --git a/unsupported/Eigen/src/EulerAngles/CMakeLists.txt b/unsupported/Eigen/src/EulerAngles/CMakeLists.txt new file mode 100644 index 000000000..40af550e8 --- /dev/null +++ b/unsupported/Eigen/src/EulerAngles/CMakeLists.txt @@ -0,0 +1,6 @@ +FILE(GLOB Eigen_EulerAngles_SRCS "*.h") + +INSTALL(FILES + ${Eigen_EulerAngles_SRCS} + DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/EulerAngles COMPONENT Devel + ) diff --git a/unsupported/Eigen/src/EulerAngles/EulerAngles.h b/unsupported/Eigen/src/EulerAngles/EulerAngles.h new file mode 100644 index 000000000..13a0da1ab --- /dev/null +++ b/unsupported/Eigen/src/EulerAngles/EulerAngles.h @@ -0,0 +1,386 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Tal Hadad <tal_hd@hotmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_EULERANGLESCLASS_H// TODO: Fix previous "EIGEN_EULERANGLES_H" definition? +#define EIGEN_EULERANGLESCLASS_H + +namespace Eigen +{ + /*template<typename Other, + int OtherRows=Other::RowsAtCompileTime, + int OtherCols=Other::ColsAtCompileTime> + struct ei_eulerangles_assign_impl;*/ + + /** \class EulerAngles + * + * \ingroup EulerAngles_Module + * + * \brief Represents a rotation in a 3 dimensional space as three Euler angles. + * + * Euler rotation is a set of three rotation of three angles over three fixed axes, defined by the EulerSystem given as a template parameter. + * + * Here is how intrinsic Euler angles works: + * - first, rotate the axes system over the alpha axis in angle alpha + * - then, rotate the axes system over the beta axis(which was rotated in the first stage) in angle beta + * - then, rotate the axes system over the gamma axis(which was rotated in the two stages above) in angle gamma + * + * \note This class support only intrinsic Euler angles for simplicity, + * see EulerSystem how to easily overcome this for extrinsic systems. + * + * ### Rotation representation and conversions ### + * + * It has been proved(see Wikipedia link below) that every rotation can be represented + * by Euler angles, but there is no singular representation (e.g. unlike rotation matrices). + * Therefore, you can convert from Eigen rotation and to them + * (including rotation matrices, which is not called "rotations" by Eigen design). + * + * Euler angles usually used for: + * - convenient human representation of rotation, especially in interactive GUI. + * - gimbal systems and robotics + * - efficient encoding(i.e. 3 floats only) of rotation for network protocols. + * + * However, Euler angles are slow comparing to quaternion or matrices, + * because their unnatural math definition, although it's simple for human. + * To overcome this, this class provide easy movement from the math friendly representation + * to the human friendly representation, and vise-versa. + * + * All the user need to do is a safe simple C++ type conversion, + * and this class take care for the math. + * Additionally, some axes related computation is done in compile time. + * + * #### Euler angles ranges in conversions #### + * + * When converting some rotation to Euler angles, there are some ways you can guarantee + * the Euler angles ranges. + * + * #### implicit ranges #### + * When using implicit ranges, all angles are guarantee to be in the range [-PI, +PI], + * unless you convert from some other Euler angles. + * In this case, the range is __undefined__ (might be even less than -PI or greater than +2*PI). + * \sa EulerAngles(const MatrixBase<Derived>&) + * \sa EulerAngles(const RotationBase<Derived, 3>&) + * + * #### explicit ranges #### + * When using explicit ranges, all angles are guarantee to be in the range you choose. + * In the range Boolean parameter, you're been ask whether you prefer the positive range or not: + * - _true_ - force the range between [0, +2*PI] + * - _false_ - force the range between [-PI, +PI] + * + * ##### compile time ranges ##### + * This is when you have compile time ranges and you prefer to + * use template parameter. (e.g. for performance) + * \sa FromRotation() + * + * ##### run-time time ranges ##### + * Run-time ranges are also supported. + * \sa EulerAngles(const MatrixBase<Derived>&, bool, bool, bool) + * \sa EulerAngles(const RotationBase<Derived, 3>&, bool, bool, bool) + * + * ### Convenient user typedefs ### + * + * Convenient typedefs for EulerAngles exist for float and double scalar, + * in a form of EulerAngles{A}{B}{C}{scalar}, + * e.g. \ref EulerAnglesXYZd, \ref EulerAnglesZYZf. + * + * Only for positive axes{+x,+y,+z} Euler systems are have convenient typedef. + * If you need negative axes{-x,-y,-z}, it is recommended to create you own typedef with + * a word that represent what you need. + * + * ### Example ### + * + * \include EulerAngles.cpp + * Output: \verbinclude EulerAngles.out + * + * ### Additional reading ### + * + * If you're want to get more idea about how Euler system work in Eigen see EulerSystem. + * + * More information about Euler angles: https://en.wikipedia.org/wiki/Euler_angles + * + * \tparam _Scalar the scalar type, i.e., the type of the angles. + * + * \tparam _System the EulerSystem to use, which represents the axes of rotation. + */ + template <typename _Scalar, class _System> + class EulerAngles : public RotationBase<EulerAngles<_Scalar, _System>, 3> + { + public: + /** the scalar type of the angles */ + typedef _Scalar Scalar; + + /** the EulerSystem to use, which represents the axes of rotation. */ + typedef _System System; + + typedef Matrix<Scalar,3,3> Matrix3; /*!< the equivalent rotation matrix type */ + typedef Matrix<Scalar,3,1> Vector3; /*!< the equivalent 3 dimension vector type */ + typedef Quaternion<Scalar> QuaternionType; /*!< the equivalent quaternion type */ + typedef AngleAxis<Scalar> AngleAxisType; /*!< the equivalent angle-axis type */ + + /** \returns the axis vector of the first (alpha) rotation */ + static Vector3 AlphaAxisVector() { + const Vector3& u = Vector3::Unit(System::AlphaAxisAbs - 1); + return System::IsAlphaOpposite ? -u : u; + } + + /** \returns the axis vector of the second (beta) rotation */ + static Vector3 BetaAxisVector() { + const Vector3& u = Vector3::Unit(System::BetaAxisAbs - 1); + return System::IsBetaOpposite ? -u : u; + } + + /** \returns the axis vector of the third (gamma) rotation */ + static Vector3 GammaAxisVector() { + const Vector3& u = Vector3::Unit(System::GammaAxisAbs - 1); + return System::IsGammaOpposite ? -u : u; + } + + private: + Vector3 m_angles; + + public: + /** Default constructor without initialization. */ + EulerAngles() {} + /** Constructs and initialize Euler angles(\p alpha, \p beta, \p gamma). */ + EulerAngles(const Scalar& alpha, const Scalar& beta, const Scalar& gamma) : + m_angles(alpha, beta, gamma) {} + + /** Constructs and initialize Euler angles from a 3x3 rotation matrix \p m. + * + * \note All angles will be in the range [-PI, PI]. + */ + template<typename Derived> + EulerAngles(const MatrixBase<Derived>& m) { *this = m; } + + /** Constructs and initialize Euler angles from a 3x3 rotation matrix \p m, + * with options to choose for each angle the requested range. + * + * If positive range is true, then the specified angle will be in the range [0, +2*PI]. + * Otherwise, the specified angle will be in the range [-PI, +PI]. + * + * \param m The 3x3 rotation matrix to convert + * \param positiveRangeAlpha If true, alpha will be in [0, 2*PI]. Otherwise, in [-PI, +PI]. + * \param positiveRangeBeta If true, beta will be in [0, 2*PI]. Otherwise, in [-PI, +PI]. + * \param positiveRangeGamma If true, gamma will be in [0, 2*PI]. Otherwise, in [-PI, +PI]. + */ + template<typename Derived> + EulerAngles( + const MatrixBase<Derived>& m, + bool positiveRangeAlpha, + bool positiveRangeBeta, + bool positiveRangeGamma) { + + System::CalcEulerAngles(*this, m, positiveRangeAlpha, positiveRangeBeta, positiveRangeGamma); + } + + /** Constructs and initialize Euler angles from a rotation \p rot. + * + * \note All angles will be in the range [-PI, PI], unless \p rot is an EulerAngles. + * If rot is an EulerAngles, expected EulerAngles range is __undefined__. + * (Use other functions here for enforcing range if this effect is desired) + */ + template<typename Derived> + EulerAngles(const RotationBase<Derived, 3>& rot) { *this = rot; } + + /** Constructs and initialize Euler angles from a rotation \p rot, + * with options to choose for each angle the requested range. + * + * If positive range is true, then the specified angle will be in the range [0, +2*PI]. + * Otherwise, the specified angle will be in the range [-PI, +PI]. + * + * \param rot The 3x3 rotation matrix to convert + * \param positiveRangeAlpha If true, alpha will be in [0, 2*PI]. Otherwise, in [-PI, +PI]. + * \param positiveRangeBeta If true, beta will be in [0, 2*PI]. Otherwise, in [-PI, +PI]. + * \param positiveRangeGamma If true, gamma will be in [0, 2*PI]. Otherwise, in [-PI, +PI]. + */ + template<typename Derived> + EulerAngles( + const RotationBase<Derived, 3>& rot, + bool positiveRangeAlpha, + bool positiveRangeBeta, + bool positiveRangeGamma) { + + System::CalcEulerAngles(*this, rot.toRotationMatrix(), positiveRangeAlpha, positiveRangeBeta, positiveRangeGamma); + } + + /** \returns The angle values stored in a vector (alpha, beta, gamma). */ + const Vector3& angles() const { return m_angles; } + /** \returns A read-write reference to the angle values stored in a vector (alpha, beta, gamma). */ + Vector3& angles() { return m_angles; } + + /** \returns The value of the first angle. */ + Scalar alpha() const { return m_angles[0]; } + /** \returns A read-write reference to the angle of the first angle. */ + Scalar& alpha() { return m_angles[0]; } + + /** \returns The value of the second angle. */ + Scalar beta() const { return m_angles[1]; } + /** \returns A read-write reference to the angle of the second angle. */ + Scalar& beta() { return m_angles[1]; } + + /** \returns The value of the third angle. */ + Scalar gamma() const { return m_angles[2]; } + /** \returns A read-write reference to the angle of the third angle. */ + Scalar& gamma() { return m_angles[2]; } + + /** \returns The Euler angles rotation inverse (which is as same as the negative), + * (-alpha, -beta, -gamma). + */ + EulerAngles inverse() const + { + EulerAngles res; + res.m_angles = -m_angles; + return res; + } + + /** \returns The Euler angles rotation negative (which is as same as the inverse), + * (-alpha, -beta, -gamma). + */ + EulerAngles operator -() const + { + return inverse(); + } + + /** Constructs and initialize Euler angles from a 3x3 rotation matrix \p m, + * with options to choose for each angle the requested range (__only in compile time__). + * + * If positive range is true, then the specified angle will be in the range [0, +2*PI]. + * Otherwise, the specified angle will be in the range [-PI, +PI]. + * + * \param m The 3x3 rotation matrix to convert + * \tparam positiveRangeAlpha If true, alpha will be in [0, 2*PI]. Otherwise, in [-PI, +PI]. + * \tparam positiveRangeBeta If true, beta will be in [0, 2*PI]. Otherwise, in [-PI, +PI]. + * \tparam positiveRangeGamma If true, gamma will be in [0, 2*PI]. Otherwise, in [-PI, +PI]. + */ + template< + bool PositiveRangeAlpha, + bool PositiveRangeBeta, + bool PositiveRangeGamma, + typename Derived> + static EulerAngles FromRotation(const MatrixBase<Derived>& m) + { + EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Derived, 3, 3) + + EulerAngles e; + System::template CalcEulerAngles< + PositiveRangeAlpha, PositiveRangeBeta, PositiveRangeGamma, _Scalar>(e, m); + return e; + } + + /** Constructs and initialize Euler angles from a rotation \p rot, + * with options to choose for each angle the requested range (__only in compile time__). + * + * If positive range is true, then the specified angle will be in the range [0, +2*PI]. + * Otherwise, the specified angle will be in the range [-PI, +PI]. + * + * \param rot The 3x3 rotation matrix to convert + * \tparam positiveRangeAlpha If true, alpha will be in [0, 2*PI]. Otherwise, in [-PI, +PI]. + * \tparam positiveRangeBeta If true, beta will be in [0, 2*PI]. Otherwise, in [-PI, +PI]. + * \tparam positiveRangeGamma If true, gamma will be in [0, 2*PI]. Otherwise, in [-PI, +PI]. + */ + template< + bool PositiveRangeAlpha, + bool PositiveRangeBeta, + bool PositiveRangeGamma, + typename Derived> + static EulerAngles FromRotation(const RotationBase<Derived, 3>& rot) + { + return FromRotation<PositiveRangeAlpha, PositiveRangeBeta, PositiveRangeGamma>(rot.toRotationMatrix()); + } + + /*EulerAngles& fromQuaternion(const QuaternionType& q) + { + // TODO: Implement it in a faster way for quaternions + // According to http://www.euclideanspace.com/maths/geometry/rotations/conversions/quaternionToEuler/ + // we can compute only the needed matrix cells and then convert to euler angles. (see ZYX example below) + // Currently we compute all matrix cells from quaternion. + + // Special case only for ZYX + //Scalar y2 = q.y() * q.y(); + //m_angles[0] = std::atan2(2*(q.w()*q.z() + q.x()*q.y()), (1 - 2*(y2 + q.z()*q.z()))); + //m_angles[1] = std::asin( 2*(q.w()*q.y() - q.z()*q.x())); + //m_angles[2] = std::atan2(2*(q.w()*q.x() + q.y()*q.z()), (1 - 2*(q.x()*q.x() + y2))); + }*/ + + /** Set \c *this from a rotation matrix(i.e. pure orthogonal matrix with determinant of +1). */ + template<typename Derived> + EulerAngles& operator=(const MatrixBase<Derived>& m) { + EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Derived, 3, 3) + + System::CalcEulerAngles(*this, m); + return *this; + } + + // TODO: Assign and construct from another EulerAngles (with different system) + + /** Set \c *this from a rotation. */ + template<typename Derived> + EulerAngles& operator=(const RotationBase<Derived, 3>& rot) { + System::CalcEulerAngles(*this, rot.toRotationMatrix()); + return *this; + } + + // TODO: Support isApprox function + + /** \returns an equivalent 3x3 rotation matrix. */ + Matrix3 toRotationMatrix() const + { + return static_cast<QuaternionType>(*this).toRotationMatrix(); + } + + /** Convert the Euler angles to quaternion. */ + operator QuaternionType() const + { + return + AngleAxisType(alpha(), AlphaAxisVector()) * + AngleAxisType(beta(), BetaAxisVector()) * + AngleAxisType(gamma(), GammaAxisVector()); + } + + friend std::ostream& operator<<(std::ostream& s, const EulerAngles<Scalar, System>& eulerAngles) + { + s << eulerAngles.angles().transpose(); + return s; + } + }; + +#define EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(AXES, SCALAR_TYPE, SCALAR_POSTFIX) \ + /** \ingroup EulerAngles_Module */ \ + typedef EulerAngles<SCALAR_TYPE, EulerSystem##AXES> EulerAngles##AXES##SCALAR_POSTFIX; + +#define EIGEN_EULER_ANGLES_TYPEDEFS(SCALAR_TYPE, SCALAR_POSTFIX) \ + EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(XYZ, SCALAR_TYPE, SCALAR_POSTFIX) \ + EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(XYX, SCALAR_TYPE, SCALAR_POSTFIX) \ + EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(XZY, SCALAR_TYPE, SCALAR_POSTFIX) \ + EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(XZX, SCALAR_TYPE, SCALAR_POSTFIX) \ + \ + EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(YZX, SCALAR_TYPE, SCALAR_POSTFIX) \ + EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(YZY, SCALAR_TYPE, SCALAR_POSTFIX) \ + EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(YXZ, SCALAR_TYPE, SCALAR_POSTFIX) \ + EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(YXY, SCALAR_TYPE, SCALAR_POSTFIX) \ + \ + EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(ZXY, SCALAR_TYPE, SCALAR_POSTFIX) \ + EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(ZXZ, SCALAR_TYPE, SCALAR_POSTFIX) \ + EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(ZYX, SCALAR_TYPE, SCALAR_POSTFIX) \ + EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(ZYZ, SCALAR_TYPE, SCALAR_POSTFIX) + +EIGEN_EULER_ANGLES_TYPEDEFS(float, f) +EIGEN_EULER_ANGLES_TYPEDEFS(double, d) + + namespace internal + { + template<typename _Scalar, class _System> + struct traits<EulerAngles<_Scalar, _System> > + { + typedef _Scalar Scalar; + }; + } + +} + +#endif // EIGEN_EULERANGLESCLASS_H diff --git a/unsupported/Eigen/src/EulerAngles/EulerSystem.h b/unsupported/Eigen/src/EulerAngles/EulerSystem.h new file mode 100644 index 000000000..82243e643 --- /dev/null +++ b/unsupported/Eigen/src/EulerAngles/EulerSystem.h @@ -0,0 +1,316 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Tal Hadad <tal_hd@hotmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_EULERSYSTEM_H +#define EIGEN_EULERSYSTEM_H + +namespace Eigen +{ + // Forward declerations + template <typename _Scalar, class _System> + class EulerAngles; + + namespace internal + { + // TODO: Check if already exists on the rest API + template <int Num, bool IsPositive = (Num > 0)> + struct Abs + { + enum { value = Num }; + }; + + template <int Num> + struct Abs<Num, false> + { + enum { value = -Num }; + }; + + template <int Axis> + struct IsValidAxis + { + enum { value = Axis != 0 && Abs<Axis>::value <= 3 }; + }; + } + + #define EIGEN_EULER_ANGLES_CLASS_STATIC_ASSERT(COND,MSG) typedef char static_assertion_##MSG[(COND)?1:-1] + + /** \brief Representation of a fixed signed rotation axis for EulerSystem. + * + * \ingroup EulerAngles_Module + * + * Values here represent: + * - The axis of the rotation: X, Y or Z. + * - The sign (i.e. direction of the rotation along the axis): positive(+) or negative(-) + * + * Therefore, this could express all the axes {+X,+Y,+Z,-X,-Y,-Z} + * + * For positive axis, use +EULER_{axis}, and for negative axis use -EULER_{axis}. + */ + enum EulerAxis + { + EULER_X = 1, /*!< the X axis */ + EULER_Y = 2, /*!< the Y axis */ + EULER_Z = 3 /*!< the Z axis */ + }; + + /** \class EulerSystem + * + * \ingroup EulerAngles_Module + * + * \brief Represents a fixed Euler rotation system. + * + * This meta-class goal is to represent the Euler system in compilation time, for EulerAngles. + * + * You can use this class to get two things: + * - Build an Euler system, and then pass it as a template parameter to EulerAngles. + * - Query some compile time data about an Euler system. (e.g. Whether it's tait bryan) + * + * Euler rotation is a set of three rotation on fixed axes. (see \ref EulerAngles) + * This meta-class store constantly those signed axes. (see \ref EulerAxis) + * + * ### Types of Euler systems ### + * + * All and only valid 3 dimension Euler rotation over standard + * signed axes{+X,+Y,+Z,-X,-Y,-Z} are supported: + * - all axes X, Y, Z in each valid order (see below what order is valid) + * - rotation over the axis is supported both over the positive and negative directions. + * - both tait bryan and proper/classic Euler angles (i.e. the opposite). + * + * Since EulerSystem support both positive and negative directions, + * you may call this rotation distinction in other names: + * - _right handed_ or _left handed_ + * - _counterclockwise_ or _clockwise_ + * + * Notice all axed combination are valid, and would trigger a static assertion. + * Same unsigned axes can't be neighbors, e.g. {X,X,Y} is invalid. + * This yield two and only two classes: + * - _tait bryan_ - all unsigned axes are distinct, e.g. {X,Y,Z} + * - _proper/classic Euler angles_ - The first and the third unsigned axes is equal, + * and the second is different, e.g. {X,Y,X} + * + * ### Intrinsic vs extrinsic Euler systems ### + * + * Only intrinsic Euler systems are supported for simplicity. + * If you want to use extrinsic Euler systems, + * just use the equal intrinsic opposite order for axes and angles. + * I.e axes (A,B,C) becomes (C,B,A), and angles (a,b,c) becomes (c,b,a). + * + * ### Convenient user typedefs ### + * + * Convenient typedefs for EulerSystem exist (only for positive axes Euler systems), + * in a form of EulerSystem{A}{B}{C}, e.g. \ref EulerSystemXYZ. + * + * ### Additional reading ### + * + * More information about Euler angles: https://en.wikipedia.org/wiki/Euler_angles + * + * \tparam _AlphaAxis the first fixed EulerAxis + * + * \tparam _AlphaAxis the second fixed EulerAxis + * + * \tparam _AlphaAxis the third fixed EulerAxis + */ + template <int _AlphaAxis, int _BetaAxis, int _GammaAxis> + class EulerSystem + { + public: + // It's defined this way and not as enum, because I think + // that enum is not guerantee to support negative numbers + + /** The first rotation axis */ + static const int AlphaAxis = _AlphaAxis; + + /** The second rotation axis */ + static const int BetaAxis = _BetaAxis; + + /** The third rotation axis */ + static const int GammaAxis = _GammaAxis; + + enum + { + AlphaAxisAbs = internal::Abs<AlphaAxis>::value, /*!< the first rotation axis unsigned */ + BetaAxisAbs = internal::Abs<BetaAxis>::value, /*!< the second rotation axis unsigned */ + GammaAxisAbs = internal::Abs<GammaAxis>::value, /*!< the third rotation axis unsigned */ + + IsAlphaOpposite = (AlphaAxis < 0) ? 1 : 0, /*!< weather alpha axis is negative */ + IsBetaOpposite = (BetaAxis < 0) ? 1 : 0, /*!< weather beta axis is negative */ + IsGammaOpposite = (GammaAxis < 0) ? 1 : 0, /*!< weather gamma axis is negative */ + + IsOdd = ((AlphaAxisAbs)%3 == (BetaAxisAbs - 1)%3) ? 0 : 1, /*!< weather the Euler system is odd */ + IsEven = IsOdd ? 0 : 1, /*!< weather the Euler system is even */ + + IsTaitBryan = ((unsigned)AlphaAxisAbs != (unsigned)GammaAxisAbs) ? 1 : 0 /*!< weather the Euler system is tait bryan */ + }; + + private: + + EIGEN_EULER_ANGLES_CLASS_STATIC_ASSERT(internal::IsValidAxis<AlphaAxis>::value, + ALPHA_AXIS_IS_INVALID); + + EIGEN_EULER_ANGLES_CLASS_STATIC_ASSERT(internal::IsValidAxis<BetaAxis>::value, + BETA_AXIS_IS_INVALID); + + EIGEN_EULER_ANGLES_CLASS_STATIC_ASSERT(internal::IsValidAxis<GammaAxis>::value, + GAMMA_AXIS_IS_INVALID); + + EIGEN_EULER_ANGLES_CLASS_STATIC_ASSERT((unsigned)AlphaAxisAbs != (unsigned)BetaAxisAbs, + ALPHA_AXIS_CANT_BE_EQUAL_TO_BETA_AXIS); + + EIGEN_EULER_ANGLES_CLASS_STATIC_ASSERT((unsigned)BetaAxisAbs != (unsigned)GammaAxisAbs, + BETA_AXIS_CANT_BE_EQUAL_TO_GAMMA_AXIS); + + enum + { + // I, J, K are the pivot indexes permutation for the rotation matrix, that match this Euler system. + // They are used in this class converters. + // They are always different from each other, and their possible values are: 0, 1, or 2. + I = AlphaAxisAbs - 1, + J = (AlphaAxisAbs - 1 + 1 + IsOdd)%3, + K = (AlphaAxisAbs - 1 + 2 - IsOdd)%3 + }; + + // TODO: Get @mat parameter in form that avoids double evaluation. + template <typename Derived> + static void CalcEulerAngles_imp(Matrix<typename MatrixBase<Derived>::Scalar, 3, 1>& res, const MatrixBase<Derived>& mat, internal::true_type /*isTaitBryan*/) + { + using std::atan2; + using std::sin; + using std::cos; + + typedef typename Derived::Scalar Scalar; + typedef Matrix<Scalar,2,1> Vector2; + + res[0] = atan2(mat(J,K), mat(K,K)); + Scalar c2 = Vector2(mat(I,I), mat(I,J)).norm(); + if((IsOdd && res[0]<Scalar(0)) || ((!IsOdd) && res[0]>Scalar(0))) { + res[0] = (res[0] > Scalar(0)) ? res[0] - Scalar(EIGEN_PI) : res[0] + Scalar(EIGEN_PI); + res[1] = atan2(-mat(I,K), -c2); + } + else + res[1] = atan2(-mat(I,K), c2); + Scalar s1 = sin(res[0]); + Scalar c1 = cos(res[0]); + res[2] = atan2(s1*mat(K,I)-c1*mat(J,I), c1*mat(J,J) - s1 * mat(K,J)); + } + + template <typename Derived> + static void CalcEulerAngles_imp(Matrix<typename MatrixBase<Derived>::Scalar,3,1>& res, const MatrixBase<Derived>& mat, internal::false_type /*isTaitBryan*/) + { + using std::atan2; + using std::sin; + using std::cos; + + typedef typename Derived::Scalar Scalar; + typedef Matrix<Scalar,2,1> Vector2; + + res[0] = atan2(mat(J,I), mat(K,I)); + if((IsOdd && res[0]<Scalar(0)) || ((!IsOdd) && res[0]>Scalar(0))) + { + res[0] = (res[0] > Scalar(0)) ? res[0] - Scalar(EIGEN_PI) : res[0] + Scalar(EIGEN_PI); + Scalar s2 = Vector2(mat(J,I), mat(K,I)).norm(); + res[1] = -atan2(s2, mat(I,I)); + } + else + { + Scalar s2 = Vector2(mat(J,I), mat(K,I)).norm(); + res[1] = atan2(s2, mat(I,I)); + } + + // With a=(0,1,0), we have i=0; j=1; k=2, and after computing the first two angles, + // we can compute their respective rotation, and apply its inverse to M. Since the result must + // be a rotation around x, we have: + // + // c2 s1.s2 c1.s2 1 0 0 + // 0 c1 -s1 * M = 0 c3 s3 + // -s2 s1.c2 c1.c2 0 -s3 c3 + // + // Thus: m11.c1 - m21.s1 = c3 & m12.c1 - m22.s1 = s3 + + Scalar s1 = sin(res[0]); + Scalar c1 = cos(res[0]); + res[2] = atan2(c1*mat(J,K)-s1*mat(K,K), c1*mat(J,J) - s1 * mat(K,J)); + } + + template<typename Scalar> + static void CalcEulerAngles( + EulerAngles<Scalar, EulerSystem>& res, + const typename EulerAngles<Scalar, EulerSystem>::Matrix3& mat) + { + CalcEulerAngles(res, mat, false, false, false); + } + + template< + bool PositiveRangeAlpha, + bool PositiveRangeBeta, + bool PositiveRangeGamma, + typename Scalar> + static void CalcEulerAngles( + EulerAngles<Scalar, EulerSystem>& res, + const typename EulerAngles<Scalar, EulerSystem>::Matrix3& mat) + { + CalcEulerAngles(res, mat, PositiveRangeAlpha, PositiveRangeBeta, PositiveRangeGamma); + } + + template<typename Scalar> + static void CalcEulerAngles( + EulerAngles<Scalar, EulerSystem>& res, + const typename EulerAngles<Scalar, EulerSystem>::Matrix3& mat, + bool PositiveRangeAlpha, + bool PositiveRangeBeta, + bool PositiveRangeGamma) + { + CalcEulerAngles_imp( + res.angles(), mat, + typename internal::conditional<IsTaitBryan, internal::true_type, internal::false_type>::type()); + + if (IsAlphaOpposite == IsOdd) + res.alpha() = -res.alpha(); + + if (IsBetaOpposite == IsOdd) + res.beta() = -res.beta(); + + if (IsGammaOpposite == IsOdd) + res.gamma() = -res.gamma(); + + // Saturate results to the requested range + if (PositiveRangeAlpha && (res.alpha() < 0)) + res.alpha() += Scalar(2 * EIGEN_PI); + + if (PositiveRangeBeta && (res.beta() < 0)) + res.beta() += Scalar(2 * EIGEN_PI); + + if (PositiveRangeGamma && (res.gamma() < 0)) + res.gamma() += Scalar(2 * EIGEN_PI); + } + + template <typename _Scalar, class _System> + friend class Eigen::EulerAngles; + }; + +#define EIGEN_EULER_SYSTEM_TYPEDEF(A, B, C) \ + /** \ingroup EulerAngles_Module */ \ + typedef EulerSystem<EULER_##A, EULER_##B, EULER_##C> EulerSystem##A##B##C; + + EIGEN_EULER_SYSTEM_TYPEDEF(X,Y,Z) + EIGEN_EULER_SYSTEM_TYPEDEF(X,Y,X) + EIGEN_EULER_SYSTEM_TYPEDEF(X,Z,Y) + EIGEN_EULER_SYSTEM_TYPEDEF(X,Z,X) + + EIGEN_EULER_SYSTEM_TYPEDEF(Y,Z,X) + EIGEN_EULER_SYSTEM_TYPEDEF(Y,Z,Y) + EIGEN_EULER_SYSTEM_TYPEDEF(Y,X,Z) + EIGEN_EULER_SYSTEM_TYPEDEF(Y,X,Y) + + EIGEN_EULER_SYSTEM_TYPEDEF(Z,X,Y) + EIGEN_EULER_SYSTEM_TYPEDEF(Z,X,Z) + EIGEN_EULER_SYSTEM_TYPEDEF(Z,Y,X) + EIGEN_EULER_SYSTEM_TYPEDEF(Z,Y,Z) +} + +#endif // EIGEN_EULERSYSTEM_H diff --git a/unsupported/Eigen/src/FFT/CMakeLists.txt b/unsupported/Eigen/src/FFT/CMakeLists.txt deleted file mode 100644 index edcffcb18..000000000 --- a/unsupported/Eigen/src/FFT/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_FFT_SRCS "*.h") - -INSTALL(FILES - ${Eigen_FFT_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/FFT COMPONENT Devel - ) diff --git a/unsupported/Eigen/src/IterativeSolvers/CMakeLists.txt b/unsupported/Eigen/src/IterativeSolvers/CMakeLists.txt deleted file mode 100644 index 7986afc5e..000000000 --- a/unsupported/Eigen/src/IterativeSolvers/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_IterativeSolvers_SRCS "*.h") - -INSTALL(FILES - ${Eigen_IterativeSolvers_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/IterativeSolvers COMPONENT Devel - ) diff --git a/unsupported/Eigen/src/KroneckerProduct/CMakeLists.txt b/unsupported/Eigen/src/KroneckerProduct/CMakeLists.txt deleted file mode 100644 index 4daefebee..000000000 --- a/unsupported/Eigen/src/KroneckerProduct/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_KroneckerProduct_SRCS "*.h") - -INSTALL(FILES - ${Eigen_KroneckerProduct_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/KroneckerProduct COMPONENT Devel - ) diff --git a/unsupported/Eigen/src/LevenbergMarquardt/CMakeLists.txt b/unsupported/Eigen/src/LevenbergMarquardt/CMakeLists.txt deleted file mode 100644 index d9690854d..000000000 --- a/unsupported/Eigen/src/LevenbergMarquardt/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_LevenbergMarquardt_SRCS "*.h") - -INSTALL(FILES - ${Eigen_LevenbergMarquardt_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/LevenbergMarquardt COMPONENT Devel - ) diff --git a/unsupported/Eigen/src/MatrixFunctions/CMakeLists.txt b/unsupported/Eigen/src/MatrixFunctions/CMakeLists.txt deleted file mode 100644 index cdde64d2c..000000000 --- a/unsupported/Eigen/src/MatrixFunctions/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_MatrixFunctions_SRCS "*.h") - -INSTALL(FILES - ${Eigen_MatrixFunctions_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/MatrixFunctions COMPONENT Devel - ) diff --git a/unsupported/Eigen/src/MoreVectorization/CMakeLists.txt b/unsupported/Eigen/src/MoreVectorization/CMakeLists.txt deleted file mode 100644 index 1b887cc8e..000000000 --- a/unsupported/Eigen/src/MoreVectorization/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_MoreVectorization_SRCS "*.h") - -INSTALL(FILES - ${Eigen_MoreVectorization_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/MoreVectorization COMPONENT Devel - ) diff --git a/unsupported/Eigen/src/NonLinearOptimization/CMakeLists.txt b/unsupported/Eigen/src/NonLinearOptimization/CMakeLists.txt deleted file mode 100644 index 9322ddadf..000000000 --- a/unsupported/Eigen/src/NonLinearOptimization/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_NonLinearOptimization_SRCS "*.h") - -INSTALL(FILES - ${Eigen_NonLinearOptimization_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/NonLinearOptimization COMPONENT Devel - ) diff --git a/unsupported/Eigen/src/NumericalDiff/CMakeLists.txt b/unsupported/Eigen/src/NumericalDiff/CMakeLists.txt deleted file mode 100644 index 1199aca2f..000000000 --- a/unsupported/Eigen/src/NumericalDiff/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_NumericalDiff_SRCS "*.h") - -INSTALL(FILES - ${Eigen_NumericalDiff_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/NumericalDiff COMPONENT Devel - ) diff --git a/unsupported/Eigen/src/Polynomials/CMakeLists.txt b/unsupported/Eigen/src/Polynomials/CMakeLists.txt deleted file mode 100644 index 51f13f3cb..000000000 --- a/unsupported/Eigen/src/Polynomials/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Polynomials_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Polynomials_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/Polynomials COMPONENT Devel - ) diff --git a/unsupported/Eigen/src/Skyline/CMakeLists.txt b/unsupported/Eigen/src/Skyline/CMakeLists.txt deleted file mode 100644 index 3bf1b0dd4..000000000 --- a/unsupported/Eigen/src/Skyline/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Skyline_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Skyline_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/Skyline COMPONENT Devel - ) diff --git a/unsupported/Eigen/src/SparseExtra/CMakeLists.txt b/unsupported/Eigen/src/SparseExtra/CMakeLists.txt deleted file mode 100644 index 7ea32ca5e..000000000 --- a/unsupported/Eigen/src/SparseExtra/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_SparseExtra_SRCS "*.h") - -INSTALL(FILES - ${Eigen_SparseExtra_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/SparseExtra COMPONENT Devel - ) diff --git a/unsupported/Eigen/src/SpecialFunctions/CMakeLists.txt b/unsupported/Eigen/src/SpecialFunctions/CMakeLists.txt deleted file mode 100644 index 25df9439d..000000000 --- a/unsupported/Eigen/src/SpecialFunctions/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -FILE(GLOB Eigen_SpecialFunctions_SRCS "*.h") -INSTALL(FILES - ${Eigen_SpecialFunctions_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/SpecialFunctions COMPONENT Devel - ) - -FILE(GLOB Eigen_SpecialFunctions_arch_CUDA_SRCS "arch/CUDA/*.h") -INSTALL(FILES - ${Eigen_SpecialFunctions_arch_CUDA_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/SpecialFunctions/arch/CUDA COMPONENT Devel - )
\ No newline at end of file diff --git a/unsupported/Eigen/src/Splines/CMakeLists.txt b/unsupported/Eigen/src/Splines/CMakeLists.txt deleted file mode 100644 index 55c6271e9..000000000 --- a/unsupported/Eigen/src/Splines/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -FILE(GLOB Eigen_Splines_SRCS "*.h") - -INSTALL(FILES - ${Eigen_Splines_SRCS} - DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/Splines COMPONENT Devel - ) diff --git a/unsupported/doc/examples/EulerAngles.cpp b/unsupported/doc/examples/EulerAngles.cpp new file mode 100644 index 000000000..1ef6aee18 --- /dev/null +++ b/unsupported/doc/examples/EulerAngles.cpp @@ -0,0 +1,46 @@ +#include <unsupported/Eigen/EulerAngles> +#include <iostream> + +using namespace Eigen; + +int main() +{ + // A common Euler system by many armies around the world, + // where the first one is the azimuth(the angle from the north - + // the same angle that is show in compass) + // and the second one is elevation(the angle from the horizon) + // and the third one is roll(the angle between the horizontal body + // direction and the plane ground surface) + // Keep remembering we're using radian angles here! + typedef EulerSystem<-EULER_Z, EULER_Y, EULER_X> MyArmySystem; + typedef EulerAngles<double, MyArmySystem> MyArmyAngles; + + MyArmyAngles vehicleAngles( + 3.14/*PI*/ / 2, /* heading to east, notice that this angle is counter-clockwise */ + -0.3, /* going down from a mountain */ + 0.1); /* slightly rolled to the right */ + + // Some Euler angles representation that our plane use. + EulerAnglesZYZd planeAngles(0.78474, 0.5271, -0.513794); + + MyArmyAngles planeAnglesInMyArmyAngles = MyArmyAngles::FromRotation<true, false, false>(planeAngles); + + std::cout << "vehicle angles(MyArmy): " << vehicleAngles << std::endl; + std::cout << "plane angles(ZYZ): " << planeAngles << std::endl; + std::cout << "plane angles(MyArmy): " << planeAnglesInMyArmyAngles << std::endl; + + // Now lets rotate the plane a little bit + std::cout << "==========================================================\n"; + std::cout << "rotating plane now!\n"; + std::cout << "==========================================================\n"; + + Quaterniond planeRotated = AngleAxisd(-0.342, Vector3d::UnitY()) * planeAngles; + + planeAngles = planeRotated; + planeAnglesInMyArmyAngles = MyArmyAngles::FromRotation<true, false, false>(planeRotated); + + std::cout << "new plane angles(ZYZ): " << planeAngles << std::endl; + std::cout << "new plane angles(MyArmy): " << planeAnglesInMyArmyAngles << std::endl; + + return 0; +} diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index de9b5243a..0d7ed1db2 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -59,6 +59,8 @@ ei_add_test(alignedvector3) ei_add_test(FFT) +ei_add_test(EulerAngles) + find_package(MPFR 2.3.0) find_package(GMP) if(MPFR_FOUND AND EIGEN_COMPILER_SUPPORT_CXX11) @@ -230,20 +232,25 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA) cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include") set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu") - ei_add_test(cxx11_tensor_device) - ei_add_test(cxx11_tensor_cuda) - ei_add_test(cxx11_tensor_contract_cuda) + ei_add_test(cxx11_tensor_complex_cuda) ei_add_test(cxx11_tensor_reduction_cuda) ei_add_test(cxx11_tensor_argmax_cuda) ei_add_test(cxx11_tensor_cast_float16_cuda) ei_add_test(cxx11_tensor_scan_cuda) + # Contractions require arch 3.0 or higher + if (${EIGEN_CUDA_COMPUTE_ARCH} GREATER 29) + ei_add_test(cxx11_tensor_device) + ei_add_test(cxx11_tensor_cuda) + ei_add_test(cxx11_tensor_contract_cuda) + ei_add_test(cxx11_tensor_of_float16_cuda) + endif() + # The random number generation code requires arch 3.5 or greater. if (${EIGEN_CUDA_COMPUTE_ARCH} GREATER 34) ei_add_test(cxx11_tensor_random_cuda) endif() - ei_add_test(cxx11_tensor_of_float16_cuda) unset(EIGEN_ADD_TEST_FILENAME_EXTENSION) endif() diff --git a/unsupported/test/EulerAngles.cpp b/unsupported/test/EulerAngles.cpp new file mode 100644 index 000000000..a8cb52864 --- /dev/null +++ b/unsupported/test/EulerAngles.cpp @@ -0,0 +1,208 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Tal Hadad <tal_hd@hotmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include "main.h" + +#include <unsupported/Eigen/EulerAngles> + +using namespace Eigen; + +template<typename EulerSystem, typename Scalar> +void verify_euler_ranged(const Matrix<Scalar,3,1>& ea, + bool positiveRangeAlpha, bool positiveRangeBeta, bool positiveRangeGamma) +{ + typedef EulerAngles<Scalar, EulerSystem> EulerAnglesType; + typedef Matrix<Scalar,3,3> Matrix3; + typedef Matrix<Scalar,3,1> Vector3; + typedef Quaternion<Scalar> QuaternionType; + typedef AngleAxis<Scalar> AngleAxisType; + using std::abs; + + Scalar alphaRangeStart, alphaRangeEnd; + Scalar betaRangeStart, betaRangeEnd; + Scalar gammaRangeStart, gammaRangeEnd; + + if (positiveRangeAlpha) + { + alphaRangeStart = Scalar(0); + alphaRangeEnd = Scalar(2 * EIGEN_PI); + } + else + { + alphaRangeStart = -Scalar(EIGEN_PI); + alphaRangeEnd = Scalar(EIGEN_PI); + } + + if (positiveRangeBeta) + { + betaRangeStart = Scalar(0); + betaRangeEnd = Scalar(2 * EIGEN_PI); + } + else + { + betaRangeStart = -Scalar(EIGEN_PI); + betaRangeEnd = Scalar(EIGEN_PI); + } + + if (positiveRangeGamma) + { + gammaRangeStart = Scalar(0); + gammaRangeEnd = Scalar(2 * EIGEN_PI); + } + else + { + gammaRangeStart = -Scalar(EIGEN_PI); + gammaRangeEnd = Scalar(EIGEN_PI); + } + + const int i = EulerSystem::AlphaAxisAbs - 1; + const int j = EulerSystem::BetaAxisAbs - 1; + const int k = EulerSystem::GammaAxisAbs - 1; + + const int iFactor = EulerSystem::IsAlphaOpposite ? -1 : 1; + const int jFactor = EulerSystem::IsBetaOpposite ? -1 : 1; + const int kFactor = EulerSystem::IsGammaOpposite ? -1 : 1; + + const Vector3 I = EulerAnglesType::AlphaAxisVector(); + const Vector3 J = EulerAnglesType::BetaAxisVector(); + const Vector3 K = EulerAnglesType::GammaAxisVector(); + + EulerAnglesType e(ea[0], ea[1], ea[2]); + + Matrix3 m(e); + Vector3 eabis = EulerAnglesType(m, positiveRangeAlpha, positiveRangeBeta, positiveRangeGamma).angles(); + + // Check that eabis in range + VERIFY(alphaRangeStart <= eabis[0] && eabis[0] <= alphaRangeEnd); + VERIFY(betaRangeStart <= eabis[1] && eabis[1] <= betaRangeEnd); + VERIFY(gammaRangeStart <= eabis[2] && eabis[2] <= gammaRangeEnd); + + Vector3 eabis2 = m.eulerAngles(i, j, k); + + // Invert the relevant axes + eabis2[0] *= iFactor; + eabis2[1] *= jFactor; + eabis2[2] *= kFactor; + + // Saturate the angles to the correct range + if (positiveRangeAlpha && (eabis2[0] < 0)) + eabis2[0] += Scalar(2 * EIGEN_PI); + if (positiveRangeBeta && (eabis2[1] < 0)) + eabis2[1] += Scalar(2 * EIGEN_PI); + if (positiveRangeGamma && (eabis2[2] < 0)) + eabis2[2] += Scalar(2 * EIGEN_PI); + + VERIFY_IS_APPROX(eabis, eabis2);// Verify that our estimation is the same as m.eulerAngles() is + + Matrix3 mbis(AngleAxisType(eabis[0], I) * AngleAxisType(eabis[1], J) * AngleAxisType(eabis[2], K)); + VERIFY_IS_APPROX(m, mbis); + + // Tests that are only relevant for no possitive range + if (!(positiveRangeAlpha || positiveRangeBeta || positiveRangeGamma)) + { + /* If I==K, and ea[1]==0, then there no unique solution. */ + /* The remark apply in the case where I!=K, and |ea[1]| is close to pi/2. */ + if( (i!=k || ea[1]!=0) && (i==k || !internal::isApprox(abs(ea[1]),Scalar(EIGEN_PI/2),test_precision<Scalar>())) ) + VERIFY((ea-eabis).norm() <= test_precision<Scalar>()); + + // approx_or_less_than does not work for 0 + VERIFY(0 < eabis[0] || test_isMuchSmallerThan(eabis[0], Scalar(1))); + } + + // Quaternions + QuaternionType q(e); + eabis = EulerAnglesType(q, positiveRangeAlpha, positiveRangeBeta, positiveRangeGamma).angles(); + VERIFY_IS_APPROX(eabis, eabis2);// Verify that the euler angles are still the same +} + +template<typename EulerSystem, typename Scalar> +void verify_euler(const Matrix<Scalar,3,1>& ea) +{ + verify_euler_ranged<EulerSystem>(ea, false, false, false); + verify_euler_ranged<EulerSystem>(ea, false, false, true); + verify_euler_ranged<EulerSystem>(ea, false, true, false); + verify_euler_ranged<EulerSystem>(ea, false, true, true); + verify_euler_ranged<EulerSystem>(ea, true, false, false); + verify_euler_ranged<EulerSystem>(ea, true, false, true); + verify_euler_ranged<EulerSystem>(ea, true, true, false); + verify_euler_ranged<EulerSystem>(ea, true, true, true); +} + +template<typename Scalar> void check_all_var(const Matrix<Scalar,3,1>& ea) +{ + verify_euler<EulerSystemXYZ>(ea); + verify_euler<EulerSystemXYX>(ea); + verify_euler<EulerSystemXZY>(ea); + verify_euler<EulerSystemXZX>(ea); + + verify_euler<EulerSystemYZX>(ea); + verify_euler<EulerSystemYZY>(ea); + verify_euler<EulerSystemYXZ>(ea); + verify_euler<EulerSystemYXY>(ea); + + verify_euler<EulerSystemZXY>(ea); + verify_euler<EulerSystemZXZ>(ea); + verify_euler<EulerSystemZYX>(ea); + verify_euler<EulerSystemZYZ>(ea); +} + +template<typename Scalar> void eulerangles() +{ + typedef Matrix<Scalar,3,3> Matrix3; + typedef Matrix<Scalar,3,1> Vector3; + typedef Array<Scalar,3,1> Array3; + typedef Quaternion<Scalar> Quaternionx; + typedef AngleAxis<Scalar> AngleAxisType; + + Scalar a = internal::random<Scalar>(-Scalar(EIGEN_PI), Scalar(EIGEN_PI)); + Quaternionx q1; + q1 = AngleAxisType(a, Vector3::Random().normalized()); + Matrix3 m; + m = q1; + + Vector3 ea = m.eulerAngles(0,1,2); + check_all_var(ea); + ea = m.eulerAngles(0,1,0); + check_all_var(ea); + + // Check with purely random Quaternion: + q1.coeffs() = Quaternionx::Coefficients::Random().normalized(); + m = q1; + ea = m.eulerAngles(0,1,2); + check_all_var(ea); + ea = m.eulerAngles(0,1,0); + check_all_var(ea); + + // Check with random angles in range [0:pi]x[-pi:pi]x[-pi:pi]. + ea = (Array3::Random() + Array3(1,0,0))*Scalar(EIGEN_PI)*Array3(0.5,1,1); + check_all_var(ea); + + ea[2] = ea[0] = internal::random<Scalar>(0,Scalar(EIGEN_PI)); + check_all_var(ea); + + ea[0] = ea[1] = internal::random<Scalar>(0,Scalar(EIGEN_PI)); + check_all_var(ea); + + ea[1] = 0; + check_all_var(ea); + + ea.head(2).setZero(); + check_all_var(ea); + + ea.setZero(); + check_all_var(ea); +} + +void test_EulerAngles() +{ + for(int i = 0; i < g_repeat; i++) { + CALL_SUBTEST_1( eulerangles<float>() ); + CALL_SUBTEST_2( eulerangles<double>() ); + } +} diff --git a/unsupported/test/cxx11_eventcount.cpp b/unsupported/test/cxx11_eventcount.cpp index f16cc6f07..3b598bf42 100644 --- a/unsupported/test/cxx11_eventcount.cpp +++ b/unsupported/test/cxx11_eventcount.cpp @@ -25,7 +25,8 @@ int rand_reentrant(unsigned int* s) { static void test_basic_eventcount() { - std::vector<EventCount::Waiter> waiters(1); + MaxSizeVector<EventCount::Waiter> waiters(1); + waiters.resize(1); EventCount ec(waiters); EventCount::Waiter& w = waiters[0]; ec.Notify(false); @@ -81,7 +82,8 @@ static void test_stress_eventcount() static const int kEvents = 1 << 16; static const int kQueues = 10; - std::vector<EventCount::Waiter> waiters(kThreads); + MaxSizeVector<EventCount::Waiter> waiters(kThreads); + waiters.resize(kThreads); EventCount ec(waiters); TestQueue queues[kQueues]; diff --git a/unsupported/test/cxx11_tensor_argmax_cuda.cu b/unsupported/test/cxx11_tensor_argmax_cuda.cu index 41ccbe974..6fe8982f2 100644 --- a/unsupported/test/cxx11_tensor_argmax_cuda.cu +++ b/unsupported/test/cxx11_tensor_argmax_cuda.cu @@ -12,6 +12,9 @@ #define EIGEN_TEST_FUNC cxx11_tensor_cuda #define EIGEN_USE_GPU +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> diff --git a/unsupported/test/cxx11_tensor_cast_float16_cuda.cu b/unsupported/test/cxx11_tensor_cast_float16_cuda.cu index f22b99de8..88c233994 100644 --- a/unsupported/test/cxx11_tensor_cast_float16_cuda.cu +++ b/unsupported/test/cxx11_tensor_cast_float16_cuda.cu @@ -13,7 +13,9 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU - +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> diff --git a/unsupported/test/cxx11_tensor_complex_cuda.cu b/unsupported/test/cxx11_tensor_complex_cuda.cu new file mode 100644 index 000000000..74befe670 --- /dev/null +++ b/unsupported/test/cxx11_tensor_complex_cuda.cu @@ -0,0 +1,78 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com> +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_FUNC cxx11_tensor_complex +#define EIGEN_USE_GPU + +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif +#include "main.h" +#include <unsupported/Eigen/CXX11/Tensor> + +using Eigen::Tensor; + +void test_cuda_nullary() { + Tensor<std::complex<float>, 1, 0, int> in1(2); + Tensor<std::complex<float>, 1, 0, int> in2(2); + in1.setRandom(); + in2.setRandom(); + + std::size_t float_bytes = in1.size() * sizeof(float); + std::size_t complex_bytes = in1.size() * sizeof(std::complex<float>); + + std::complex<float>* d_in1; + std::complex<float>* d_in2; + float* d_out2; + cudaMalloc((void**)(&d_in1), complex_bytes); + cudaMalloc((void**)(&d_in2), complex_bytes); + cudaMalloc((void**)(&d_out2), float_bytes); + cudaMemcpy(d_in1, in1.data(), complex_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_in2, in2.data(), complex_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<std::complex<float>, 1, 0, int>, Eigen::Aligned> gpu_in1( + d_in1, 2); + Eigen::TensorMap<Eigen::Tensor<std::complex<float>, 1, 0, int>, Eigen::Aligned> gpu_in2( + d_in2, 2); + Eigen::TensorMap<Eigen::Tensor<float, 1, 0, int>, Eigen::Aligned> gpu_out2( + d_out2, 2); + + gpu_in1.device(gpu_device) = gpu_in1.constant(std::complex<float>(3.14f, 2.7f)); + gpu_out2.device(gpu_device) = gpu_in2.abs(); + + Tensor<std::complex<float>, 1, 0, int> new1(2); + Tensor<float, 1, 0, int> new2(2); + + assert(cudaMemcpyAsync(new1.data(), d_in1, complex_bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + assert(cudaMemcpyAsync(new2.data(), d_out2, float_bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 2; ++i) { + VERIFY_IS_APPROX(new1(i), std::complex<float>(3.14f, 2.7f)); + VERIFY_IS_APPROX(new2(i), std::abs(in2(i))); + } + + cudaFree(d_in1); + cudaFree(d_in2); + cudaFree(d_out2); +} + + + +void test_cxx11_tensor_complex() +{ + CALL_SUBTEST(test_cuda_nullary()); +} diff --git a/unsupported/test/cxx11_tensor_contract_cuda.cu b/unsupported/test/cxx11_tensor_contract_cuda.cu index 98ac180ef..767e9c678 100644 --- a/unsupported/test/cxx11_tensor_contract_cuda.cu +++ b/unsupported/test/cxx11_tensor_contract_cuda.cu @@ -14,7 +14,9 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU - +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> diff --git a/unsupported/test/cxx11_tensor_contraction.cpp b/unsupported/test/cxx11_tensor_contraction.cpp index 73623b2ed..ace97057f 100644 --- a/unsupported/test/cxx11_tensor_contraction.cpp +++ b/unsupported/test/cxx11_tensor_contraction.cpp @@ -489,6 +489,27 @@ static void test_tensor_product() } +template<int DataLayout> +static void test_const_inputs() +{ + Tensor<float, 2, DataLayout> in1(2, 3); + Tensor<float, 2, DataLayout> in2(3, 2); + in1.setRandom(); + in2.setRandom(); + + TensorMap<Tensor<const float, 2, DataLayout> > mat1(in1.data(), 2, 3); + TensorMap<Tensor<const float, 2, DataLayout> > mat2(in2.data(), 3, 2); + Tensor<float, 2, DataLayout> mat3(2,2); + + Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}}; + mat3 = mat1.contract(mat2, dims); + + VERIFY_IS_APPROX(mat3(0,0), mat1(0,0)*mat2(0,0) + mat1(0,1)*mat2(1,0) + mat1(0,2)*mat2(2,0)); + VERIFY_IS_APPROX(mat3(0,1), mat1(0,0)*mat2(0,1) + mat1(0,1)*mat2(1,1) + mat1(0,2)*mat2(2,1)); + VERIFY_IS_APPROX(mat3(1,0), mat1(1,0)*mat2(0,0) + mat1(1,1)*mat2(1,0) + mat1(1,2)*mat2(2,0)); + VERIFY_IS_APPROX(mat3(1,1), mat1(1,0)*mat2(0,1) + mat1(1,1)*mat2(1,1) + mat1(1,2)*mat2(2,1)); +} + void test_cxx11_tensor_contraction() { CALL_SUBTEST(test_evals<ColMajor>()); @@ -519,4 +540,6 @@ void test_cxx11_tensor_contraction() CALL_SUBTEST(test_small_blocking_factors<RowMajor>()); CALL_SUBTEST(test_tensor_product<ColMajor>()); CALL_SUBTEST(test_tensor_product<RowMajor>()); + CALL_SUBTEST(test_const_inputs<ColMajor>()); + CALL_SUBTEST(test_const_inputs<RowMajor>()); } diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu index 284b46803..bf216587a 100644 --- a/unsupported/test/cxx11_tensor_cuda.cu +++ b/unsupported/test/cxx11_tensor_cuda.cu @@ -10,19 +10,65 @@ #define EIGEN_TEST_NO_LONGDOUBLE #define EIGEN_TEST_NO_COMPLEX #define EIGEN_TEST_FUNC cxx11_tensor_cuda -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU - +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> using Eigen::Tensor; +void test_cuda_nullary() { + Tensor<float, 1, 0, int> in1(2); + Tensor<float, 1, 0, int> in2(2); + in1.setRandom(); + in2.setRandom(); + + std::size_t tensor_bytes = in1.size() * sizeof(float); + + float* d_in1; + float* d_in2; + cudaMalloc((void**)(&d_in1), tensor_bytes); + cudaMalloc((void**)(&d_in2), tensor_bytes); + cudaMemcpy(d_in1, in1.data(), tensor_bytes, cudaMemcpyHostToDevice); + cudaMemcpy(d_in2, in2.data(), tensor_bytes, cudaMemcpyHostToDevice); + + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + + Eigen::TensorMap<Eigen::Tensor<float, 1, 0, int>, Eigen::Aligned> gpu_in1( + d_in1, 2); + Eigen::TensorMap<Eigen::Tensor<float, 1, 0, int>, Eigen::Aligned> gpu_in2( + d_in2, 2); + + gpu_in1.device(gpu_device) = gpu_in1.constant(3.14f); + gpu_in2.device(gpu_device) = gpu_in2.random(); + + Tensor<float, 1, 0, int> new1(2); + Tensor<float, 1, 0, int> new2(2); + + assert(cudaMemcpyAsync(new1.data(), d_in1, tensor_bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + assert(cudaMemcpyAsync(new2.data(), d_in2, tensor_bytes, cudaMemcpyDeviceToHost, + gpu_device.stream()) == cudaSuccess); + + assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess); + + for (int i = 0; i < 2; ++i) { + VERIFY_IS_APPROX(new1(i), 3.14f); + VERIFY_IS_NOT_EQUAL(new2(i), in2(i)); + } + + cudaFree(d_in1); + cudaFree(d_in2); +} + void test_cuda_elementwise_small() { - Tensor<float, 1> in1(Eigen::array<int, 1>(2)); - Tensor<float, 1> in2(Eigen::array<int, 1>(2)); - Tensor<float, 1> out(Eigen::array<int, 1>(2)); + Tensor<float, 1> in1(Eigen::array<Eigen::DenseIndex, 1>(2)); + Tensor<float, 1> in2(Eigen::array<Eigen::DenseIndex, 1>(2)); + Tensor<float, 1> out(Eigen::array<Eigen::DenseIndex, 1>(2)); in1.setRandom(); in2.setRandom(); @@ -44,11 +90,11 @@ void test_cuda_elementwise_small() { Eigen::GpuDevice gpu_device(&stream); Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_in1( - d_in1, Eigen::array<int, 1>(2)); + d_in1, Eigen::array<Eigen::DenseIndex, 1>(2)); Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_in2( - d_in2, Eigen::array<int, 1>(2)); + d_in2, Eigen::array<Eigen::DenseIndex, 1>(2)); Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_out( - d_out, Eigen::array<int, 1>(2)); + d_out, Eigen::array<Eigen::DenseIndex, 1>(2)); gpu_out.device(gpu_device) = gpu_in1 + gpu_in2; @@ -58,8 +104,8 @@ void test_cuda_elementwise_small() { for (int i = 0; i < 2; ++i) { VERIFY_IS_APPROX( - out(Eigen::array<int, 1>(i)), - in1(Eigen::array<int, 1>(i)) + in2(Eigen::array<int, 1>(i))); + out(Eigen::array<Eigen::DenseIndex, 1>(i)), + in1(Eigen::array<Eigen::DenseIndex, 1>(i)) + in2(Eigen::array<Eigen::DenseIndex, 1>(i))); } cudaFree(d_in1); @@ -69,10 +115,10 @@ void test_cuda_elementwise_small() { void test_cuda_elementwise() { - Tensor<float, 3> in1(Eigen::array<int, 3>(72,53,97)); - Tensor<float, 3> in2(Eigen::array<int, 3>(72,53,97)); - Tensor<float, 3> in3(Eigen::array<int, 3>(72,53,97)); - Tensor<float, 3> out(Eigen::array<int, 3>(72,53,97)); + Tensor<float, 3> in1(Eigen::array<Eigen::DenseIndex, 3>(72,53,97)); + Tensor<float, 3> in2(Eigen::array<Eigen::DenseIndex, 3>(72,53,97)); + Tensor<float, 3> in3(Eigen::array<Eigen::DenseIndex, 3>(72,53,97)); + Tensor<float, 3> out(Eigen::array<Eigen::DenseIndex, 3>(72,53,97)); in1.setRandom(); in2.setRandom(); in3.setRandom(); @@ -98,10 +144,10 @@ void test_cuda_elementwise() Eigen::CudaStreamDevice stream; Eigen::GpuDevice gpu_device(&stream); - Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in1(d_in1, Eigen::array<int, 3>(72,53,97)); - Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in2(d_in2, Eigen::array<int, 3>(72,53,97)); - Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in3(d_in3, Eigen::array<int, 3>(72,53,97)); - Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_out(d_out, Eigen::array<int, 3>(72,53,97)); + Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in1(d_in1, Eigen::array<Eigen::DenseIndex, 3>(72,53,97)); + Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in2(d_in2, Eigen::array<Eigen::DenseIndex, 3>(72,53,97)); + Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in3(d_in3, Eigen::array<Eigen::DenseIndex, 3>(72,53,97)); + Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_out(d_out, Eigen::array<Eigen::DenseIndex, 3>(72,53,97)); gpu_out.device(gpu_device) = gpu_in1 + gpu_in2 * gpu_in3; @@ -111,7 +157,7 @@ void test_cuda_elementwise() for (int i = 0; i < 72; ++i) { for (int j = 0; j < 53; ++j) { for (int k = 0; k < 97; ++k) { - VERIFY_IS_APPROX(out(Eigen::array<int, 3>(i,j,k)), in1(Eigen::array<int, 3>(i,j,k)) + in2(Eigen::array<int, 3>(i,j,k)) * in3(Eigen::array<int, 3>(i,j,k))); + VERIFY_IS_APPROX(out(Eigen::array<Eigen::DenseIndex, 3>(i,j,k)), in1(Eigen::array<Eigen::DenseIndex, 3>(i,j,k)) + in2(Eigen::array<Eigen::DenseIndex, 3>(i,j,k)) * in3(Eigen::array<Eigen::DenseIndex, 3>(i,j,k))); } } } @@ -181,7 +227,7 @@ void test_cuda_reduction() Eigen::TensorMap<Eigen::Tensor<float, 4> > gpu_in1(d_in1, 72,53,97,113); Eigen::TensorMap<Eigen::Tensor<float, 2> > gpu_out(d_out, 72,97); - array<int, 2> reduction_axis; + array<Eigen::DenseIndex, 2> reduction_axis; reduction_axis[0] = 1; reduction_axis[1] = 3; @@ -214,8 +260,8 @@ void test_cuda_contraction() // more than 30 * 1024, which is the number of threads in blocks on // a 15 SM GK110 GPU Tensor<float, 4, DataLayout> t_left(6, 50, 3, 31); - Tensor<float, 5, DataLayout> t_right(Eigen::array<int, 5>(3, 31, 7, 20, 1)); - Tensor<float, 5, DataLayout> t_result(Eigen::array<int, 5>(6, 50, 7, 20, 1)); + Tensor<float, 5, DataLayout> t_right(Eigen::array<Eigen::DenseIndex, 5>(3, 31, 7, 20, 1)); + Tensor<float, 5, DataLayout> t_result(Eigen::array<Eigen::DenseIndex, 5>(6, 50, 7, 20, 1)); t_left.setRandom(); t_right.setRandom(); @@ -299,7 +345,7 @@ void test_cuda_convolution_1d() Eigen::TensorMap<Eigen::Tensor<float, 1, DataLayout> > gpu_kernel(d_kernel, 4); Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_out(d_out, 74,34,11,137); - Eigen::array<int, 1> dims(1); + Eigen::array<Eigen::DenseIndex, 1> dims(1); gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims); assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); @@ -352,7 +398,7 @@ void test_cuda_convolution_inner_dim_col_major_1d() Eigen::TensorMap<Eigen::Tensor<float, 1, ColMajor> > gpu_kernel(d_kernel,4); Eigen::TensorMap<Eigen::Tensor<float, 4, ColMajor> > gpu_out(d_out,71,9,11,7); - Eigen::array<int, 1> dims(0); + Eigen::array<Eigen::DenseIndex, 1> dims(0); gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims); assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); @@ -405,7 +451,7 @@ void test_cuda_convolution_inner_dim_row_major_1d() Eigen::TensorMap<Eigen::Tensor<float, 1, RowMajor> > gpu_kernel(d_kernel, 4); Eigen::TensorMap<Eigen::Tensor<float, 4, RowMajor> > gpu_out(d_out, 7,9,11,71); - Eigen::array<int, 1> dims(3); + Eigen::array<Eigen::DenseIndex, 1> dims(3); gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims); assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); @@ -459,7 +505,7 @@ void test_cuda_convolution_2d() Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> > gpu_kernel(d_kernel,3,4); Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_out(d_out,74,35,8,137); - Eigen::array<int, 2> dims(1,2); + Eigen::array<Eigen::DenseIndex, 2> dims(1,2); gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims); assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); @@ -496,9 +542,9 @@ void test_cuda_convolution_2d() template<int DataLayout> void test_cuda_convolution_3d() { - Tensor<float, 5, DataLayout> input(Eigen::array<int, 5>(74,37,11,137,17)); + Tensor<float, 5, DataLayout> input(Eigen::array<Eigen::DenseIndex, 5>(74,37,11,137,17)); Tensor<float, 3, DataLayout> kernel(3,4,2); - Tensor<float, 5, DataLayout> out(Eigen::array<int, 5>(74,35,8,136,17)); + Tensor<float, 5, DataLayout> out(Eigen::array<Eigen::DenseIndex, 5>(74,35,8,136,17)); input = input.constant(10.0f) + input.random(); kernel = kernel.constant(7.0f) + kernel.random(); @@ -523,7 +569,7 @@ void test_cuda_convolution_3d() Eigen::TensorMap<Eigen::Tensor<float, 3, DataLayout> > gpu_kernel(d_kernel,3,4,2); Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> > gpu_out(d_out,74,35,8,136,17); - Eigen::array<int, 3> dims(1,2,3); + Eigen::array<Eigen::DenseIndex, 3> dims(1,2,3); gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims); assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess); @@ -1168,6 +1214,7 @@ void test_cuda_betainc() void test_cxx11_tensor_cuda() { + CALL_SUBTEST_1(test_cuda_nullary()); CALL_SUBTEST_1(test_cuda_elementwise_small()); CALL_SUBTEST_1(test_cuda_elementwise()); CALL_SUBTEST_1(test_cuda_props()); diff --git a/unsupported/test/cxx11_tensor_device.cu b/unsupported/test/cxx11_tensor_device.cu index b6ca54d93..fde20ddf2 100644 --- a/unsupported/test/cxx11_tensor_device.cu +++ b/unsupported/test/cxx11_tensor_device.cu @@ -13,7 +13,9 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU - +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> diff --git a/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/unsupported/test/cxx11_tensor_of_float16_cuda.cu index 2f55f9361..cbf401c86 100644 --- a/unsupported/test/cxx11_tensor_of_float16_cuda.cu +++ b/unsupported/test/cxx11_tensor_of_float16_cuda.cu @@ -13,7 +13,9 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU - +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> @@ -181,30 +183,39 @@ void test_cuda_trancendental() { float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float)); float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float)); + float* d_float3 = (float*)gpu_device.allocate(num_elem * sizeof(float)); Eigen::half* d_res1_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); Eigen::half* d_res1_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); Eigen::half* d_res2_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); Eigen::half* d_res2_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); - - Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float1( - d_float1, num_elem); - Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float2( - d_float2, num_elem); - Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res1_half( - d_res1_half, num_elem); - Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res1_float( - d_res1_float, num_elem); - Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res2_half( - d_res2_half, num_elem); - Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res2_float( - d_res2_float, num_elem); + Eigen::half* d_res3_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); + Eigen::half* d_res3_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half)); + + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float1(d_float1, num_elem); + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float2(d_float2, num_elem); + Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float3(d_float3, num_elem); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res1_half(d_res1_half, num_elem); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res1_float(d_res1_float, num_elem); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res2_half(d_res2_half, num_elem); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res2_float(d_res2_float, num_elem); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res3_half(d_res3_half, num_elem); + Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res3_float(d_res3_float, num_elem); gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f); gpu_float2.device(gpu_device) = gpu_float2.random() + gpu_float1.constant(0.5f); + gpu_float3.device(gpu_device) = gpu_float3.random(); gpu_res1_float.device(gpu_device) = gpu_float1.exp().cast<Eigen::half>(); gpu_res2_float.device(gpu_device) = gpu_float2.log().cast<Eigen::half>(); - gpu_res1_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().exp(); - gpu_res2_half.device(gpu_device) = gpu_float2.cast<Eigen::half>().log(); + gpu_res3_float.device(gpu_device) = gpu_float3.log1p().cast<Eigen::half>(); + + gpu_res1_half.device(gpu_device) = gpu_float1.cast<Eigen::half>(); + gpu_res1_half.device(gpu_device) = gpu_res1_half.exp(); + + gpu_res2_half.device(gpu_device) = gpu_float2.cast<Eigen::half>(); + gpu_res2_half.device(gpu_device) = gpu_res2_half.log(); + + gpu_res3_half.device(gpu_device) = gpu_float3.cast<Eigen::half>(); + gpu_res3_half.device(gpu_device) = gpu_res3_half.log1p(); Tensor<float, 1> input1(num_elem); Tensor<Eigen::half, 1> half_prec1(num_elem); @@ -212,12 +223,18 @@ void test_cuda_trancendental() { Tensor<float, 1> input2(num_elem); Tensor<Eigen::half, 1> half_prec2(num_elem); Tensor<Eigen::half, 1> full_prec2(num_elem); + Tensor<float, 1> input3(num_elem); + Tensor<Eigen::half, 1> half_prec3(num_elem); + Tensor<Eigen::half, 1> full_prec3(num_elem); gpu_device.memcpyDeviceToHost(input1.data(), d_float1, num_elem*sizeof(float)); gpu_device.memcpyDeviceToHost(input2.data(), d_float2, num_elem*sizeof(float)); + gpu_device.memcpyDeviceToHost(input3.data(), d_float3, num_elem*sizeof(float)); gpu_device.memcpyDeviceToHost(half_prec1.data(), d_res1_half, num_elem*sizeof(Eigen::half)); gpu_device.memcpyDeviceToHost(full_prec1.data(), d_res1_float, num_elem*sizeof(Eigen::half)); gpu_device.memcpyDeviceToHost(half_prec2.data(), d_res2_half, num_elem*sizeof(Eigen::half)); gpu_device.memcpyDeviceToHost(full_prec2.data(), d_res2_float, num_elem*sizeof(Eigen::half)); + gpu_device.memcpyDeviceToHost(half_prec3.data(), d_res3_half, num_elem*sizeof(Eigen::half)); + gpu_device.memcpyDeviceToHost(full_prec3.data(), d_res3_float, num_elem*sizeof(Eigen::half)); gpu_device.synchronize(); for (int i = 0; i < num_elem; ++i) { @@ -231,12 +248,19 @@ void test_cuda_trancendental() { else VERIFY_IS_APPROX(full_prec2(i), half_prec2(i)); } + for (int i = 0; i < num_elem; ++i) { + std::cout << "Checking elemwise plog1 " << i << " input = " << input3(i) << " full = " << full_prec3(i) << " half = " << half_prec3(i) << std::endl; + VERIFY_IS_APPROX(full_prec3(i), half_prec3(i)); + } gpu_device.deallocate(d_float1); gpu_device.deallocate(d_float2); + gpu_device.deallocate(d_float3); gpu_device.deallocate(d_res1_half); gpu_device.deallocate(d_res1_float); gpu_device.deallocate(d_res2_half); gpu_device.deallocate(d_res2_float); + gpu_device.deallocate(d_res3_float); + gpu_device.deallocate(d_res3_half); } template<typename> diff --git a/unsupported/test/cxx11_tensor_random_cuda.cu b/unsupported/test/cxx11_tensor_random_cuda.cu index fa1a46732..b3be199e1 100644 --- a/unsupported/test/cxx11_tensor_random_cuda.cu +++ b/unsupported/test/cxx11_tensor_random_cuda.cu @@ -13,6 +13,9 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif #include "main.h" #include <Eigen/CXX11/Tensor> diff --git a/unsupported/test/cxx11_tensor_reduction_cuda.cu b/unsupported/test/cxx11_tensor_reduction_cuda.cu index cad0c08e0..6858b43a7 100644 --- a/unsupported/test/cxx11_tensor_reduction_cuda.cu +++ b/unsupported/test/cxx11_tensor_reduction_cuda.cu @@ -12,11 +12,14 @@ #define EIGEN_TEST_FUNC cxx11_tensor_reduction_cuda #define EIGEN_USE_GPU +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> -template<int DataLayout> +template<typename Type, int DataLayout> static void test_full_reductions() { Eigen::CudaStreamDevice stream; @@ -25,24 +28,24 @@ static void test_full_reductions() { const int num_rows = internal::random<int>(1024, 5*1024); const int num_cols = internal::random<int>(1024, 5*1024); - Tensor<float, 2, DataLayout> in(num_rows, num_cols); + Tensor<Type, 2, DataLayout> in(num_rows, num_cols); in.setRandom(); - Tensor<float, 0, DataLayout> full_redux; + Tensor<Type, 0, DataLayout> full_redux; full_redux = in.sum(); - std::size_t in_bytes = in.size() * sizeof(float); - std::size_t out_bytes = full_redux.size() * sizeof(float); - float* gpu_in_ptr = static_cast<float*>(gpu_device.allocate(in_bytes)); - float* gpu_out_ptr = static_cast<float*>(gpu_device.allocate(out_bytes)); + std::size_t in_bytes = in.size() * sizeof(Type); + std::size_t out_bytes = full_redux.size() * sizeof(Type); + Type* gpu_in_ptr = static_cast<Type*>(gpu_device.allocate(in_bytes)); + Type* gpu_out_ptr = static_cast<Type*>(gpu_device.allocate(out_bytes)); gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes); - TensorMap<Tensor<float, 2, DataLayout> > in_gpu(gpu_in_ptr, num_rows, num_cols); - TensorMap<Tensor<float, 0, DataLayout> > out_gpu(gpu_out_ptr); + TensorMap<Tensor<Type, 2, DataLayout> > in_gpu(gpu_in_ptr, num_rows, num_cols); + TensorMap<Tensor<Type, 0, DataLayout> > out_gpu(gpu_out_ptr); out_gpu.device(gpu_device) = in_gpu.sum(); - Tensor<float, 0, DataLayout> full_redux_gpu; + Tensor<Type, 0, DataLayout> full_redux_gpu; gpu_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_ptr, out_bytes); gpu_device.synchronize(); @@ -53,7 +56,102 @@ static void test_full_reductions() { gpu_device.deallocate(gpu_out_ptr); } +template<typename Type, int DataLayout> +static void test_first_dim_reductions() { + int dim_x = 33; + int dim_y = 1; + int dim_z = 128; + + Tensor<Type, 3, DataLayout> in(dim_x, dim_y, dim_z); + in.setRandom(); + + Eigen::array<int, 1> red_axis; + red_axis[0] = 0; + Tensor<Type, 2, DataLayout> redux = in.sum(red_axis); + + // Create device + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice dev(&stream); + + // Create data(T) + Type* in_data = (Type*)dev.allocate(dim_x*dim_y*dim_z*sizeof(Type)); + Type* out_data = (Type*)dev.allocate(dim_z*dim_y*sizeof(Type)); + Eigen::TensorMap<Eigen::Tensor<Type, 3, DataLayout> > gpu_in(in_data, dim_x, dim_y, dim_z); + Eigen::TensorMap<Eigen::Tensor<Type, 2, DataLayout> > gpu_out(out_data, dim_y, dim_z); + + // Perform operation + dev.memcpyHostToDevice(in_data, in.data(), in.size()*sizeof(Type)); + gpu_out.device(dev) = gpu_in.sum(red_axis); + gpu_out.device(dev) += gpu_in.sum(red_axis); + Tensor<Type, 2, DataLayout> redux_gpu(dim_y, dim_z); + dev.memcpyDeviceToHost(redux_gpu.data(), out_data, gpu_out.size()*sizeof(Type)); + dev.synchronize(); + + // Check that the CPU and GPU reductions return the same result. + for (int i = 0; i < gpu_out.size(); ++i) { + VERIFY_IS_APPROX(2*redux(i), redux_gpu(i)); + } + + dev.deallocate(in_data); + dev.deallocate(out_data); +} + +template<typename Type, int DataLayout> +static void test_last_dim_reductions() { + int dim_x = 128; + int dim_y = 1; + int dim_z = 33; + + Tensor<Type, 3, DataLayout> in(dim_x, dim_y, dim_z); + in.setRandom(); + + Eigen::array<int, 1> red_axis; + red_axis[0] = 2; + Tensor<Type, 2, DataLayout> redux = in.sum(red_axis); + + // Create device + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice dev(&stream); + + // Create data + Type* in_data = (Type*)dev.allocate(dim_x*dim_y*dim_z*sizeof(Type)); + Type* out_data = (Type*)dev.allocate(dim_x*dim_y*sizeof(Type)); + Eigen::TensorMap<Eigen::Tensor<Type, 3, DataLayout> > gpu_in(in_data, dim_x, dim_y, dim_z); + Eigen::TensorMap<Eigen::Tensor<Type, 2, DataLayout> > gpu_out(out_data, dim_x, dim_y); + + // Perform operation + dev.memcpyHostToDevice(in_data, in.data(), in.size()*sizeof(Type)); + gpu_out.device(dev) = gpu_in.sum(red_axis); + gpu_out.device(dev) += gpu_in.sum(red_axis); + Tensor<Type, 2, DataLayout> redux_gpu(dim_x, dim_y); + dev.memcpyDeviceToHost(redux_gpu.data(), out_data, gpu_out.size()*sizeof(Type)); + dev.synchronize(); + + // Check that the CPU and GPU reductions return the same result. + for (int i = 0; i < gpu_out.size(); ++i) { + VERIFY_IS_APPROX(2*redux(i), redux_gpu(i)); + } + + dev.deallocate(in_data); + dev.deallocate(out_data); +} + + void test_cxx11_tensor_reduction_cuda() { - CALL_SUBTEST_1(test_full_reductions<ColMajor>()); - CALL_SUBTEST_2(test_full_reductions<RowMajor>()); + CALL_SUBTEST_1((test_full_reductions<float, ColMajor>())); + CALL_SUBTEST_1((test_full_reductions<double, ColMajor>())); + CALL_SUBTEST_2((test_full_reductions<float, RowMajor>())); + CALL_SUBTEST_2((test_full_reductions<double, RowMajor>())); + + CALL_SUBTEST_3((test_first_dim_reductions<float, ColMajor>())); + CALL_SUBTEST_3((test_first_dim_reductions<double, ColMajor>())); + CALL_SUBTEST_4((test_first_dim_reductions<float, RowMajor>())); +// Outer reductions of doubles aren't supported just yet. +// CALL_SUBTEST_4((test_first_dim_reductions<double, RowMajor>())) + + CALL_SUBTEST_5((test_last_dim_reductions<float, ColMajor>())); +// Outer reductions of doubles aren't supported just yet. +// CALL_SUBTEST_5((test_last_dim_reductions<double, ColMajor>())); + CALL_SUBTEST_6((test_last_dim_reductions<float, RowMajor>())); + CALL_SUBTEST_6((test_last_dim_reductions<double, RowMajor>())); } diff --git a/unsupported/test/cxx11_tensor_scan_cuda.cu b/unsupported/test/cxx11_tensor_scan_cuda.cu index 35e19e51c..761d11fd1 100644 --- a/unsupported/test/cxx11_tensor_scan_cuda.cu +++ b/unsupported/test/cxx11_tensor_scan_cuda.cu @@ -13,7 +13,9 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU - +#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500 +#include <cuda_fp16.h> +#endif #include "main.h" #include <unsupported/Eigen/CXX11/Tensor> diff --git a/unsupported/test/kronecker_product.cpp b/unsupported/test/kronecker_product.cpp index 02411a262..e770049e5 100644 --- a/unsupported/test/kronecker_product.cpp +++ b/unsupported/test/kronecker_product.cpp @@ -9,12 +9,12 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. +#ifdef EIGEN_TEST_PART_1 #include "sparse.h" #include <Eigen/SparseExtra> #include <Eigen/KroneckerProduct> - template<typename MatrixType> void check_dimension(const MatrixType& ab, const int rows, const int cols) { @@ -230,3 +230,23 @@ void test_kronecker_product() VERIFY_IS_APPROX(MatrixXf(sC2),dC); } } + +#endif + +#ifdef EIGEN_TEST_PART_2 + +// simply check that for a dense kronecker product, sparse module is not needed + +#include "main.h" +#include <Eigen/KroneckerProduct> + +void test_kronecker_product() +{ + MatrixXd a(2,2), b(3,3), c; + a.setRandom(); + b.setRandom(); + c = kroneckerProduct(a,b); + VERIFY_IS_APPROX(c.block(3,3,3,3), a(1,1)*b); +} + +#endif |