aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported
diff options
context:
space:
mode:
authorGravatar Luke Iwanski <luke@codeplay.com>2016-09-19 14:03:54 +0100
committerGravatar Luke Iwanski <luke@codeplay.com>2016-09-19 14:03:54 +0100
commitb91e0211727b9ea5d7c30908ed86afc4e50d4c6c (patch)
tree4dd857d1aabdd4065e8b7dec1369a4bd06bac5e7 /unsupported
parentcb81975714a96ecb2faf33ca242feeee3543b1db (diff)
parentff47717f25aeede4878f65b214cdce264b8314e8 (diff)
Merged with default.
Diffstat (limited to 'unsupported')
-rw-r--r--unsupported/Eigen/CMakeLists.txt6
-rw-r--r--unsupported/Eigen/CXX11/CMakeLists.txt2
-rw-r--r--unsupported/Eigen/CXX11/src/CMakeLists.txt4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/CMakeLists.txt6
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBase.h6
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h9
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h24
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h9
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h114
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h23
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h79
-rw-r--r--unsupported/Eigen/CXX11/src/TensorSymmetry/CMakeLists.txt8
-rw-r--r--unsupported/Eigen/CXX11/src/TensorSymmetry/util/CMakeLists.txt6
-rw-r--r--unsupported/Eigen/CXX11/src/ThreadPool/CMakeLists.txt6
-rw-r--r--unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h4
-rw-r--r--unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h4
-rw-r--r--unsupported/Eigen/CXX11/src/util/CMakeLists.txt6
-rw-r--r--unsupported/Eigen/CXX11/src/util/MaxSizeVector.h11
-rw-r--r--unsupported/Eigen/EulerAngles43
-rw-r--r--unsupported/Eigen/KroneckerProduct2
-rw-r--r--unsupported/Eigen/src/AutoDiff/CMakeLists.txt6
-rw-r--r--unsupported/Eigen/src/BVH/CMakeLists.txt6
-rw-r--r--unsupported/Eigen/src/CMakeLists.txt16
-rw-r--r--unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h14
-rw-r--r--unsupported/Eigen/src/Eigenvalues/CMakeLists.txt6
-rw-r--r--unsupported/Eigen/src/EulerAngles/CMakeLists.txt6
-rw-r--r--unsupported/Eigen/src/EulerAngles/EulerAngles.h386
-rw-r--r--unsupported/Eigen/src/EulerAngles/EulerSystem.h316
-rw-r--r--unsupported/Eigen/src/FFT/CMakeLists.txt6
-rw-r--r--unsupported/Eigen/src/IterativeSolvers/CMakeLists.txt6
-rw-r--r--unsupported/Eigen/src/KroneckerProduct/CMakeLists.txt6
-rw-r--r--unsupported/Eigen/src/LevenbergMarquardt/CMakeLists.txt6
-rw-r--r--unsupported/Eigen/src/MatrixFunctions/CMakeLists.txt6
-rw-r--r--unsupported/Eigen/src/MoreVectorization/CMakeLists.txt6
-rw-r--r--unsupported/Eigen/src/NonLinearOptimization/CMakeLists.txt6
-rw-r--r--unsupported/Eigen/src/NumericalDiff/CMakeLists.txt6
-rw-r--r--unsupported/Eigen/src/Polynomials/CMakeLists.txt6
-rw-r--r--unsupported/Eigen/src/Skyline/CMakeLists.txt6
-rw-r--r--unsupported/Eigen/src/SparseExtra/CMakeLists.txt6
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/CMakeLists.txt11
-rw-r--r--unsupported/Eigen/src/Splines/CMakeLists.txt6
-rw-r--r--unsupported/doc/examples/EulerAngles.cpp46
-rw-r--r--unsupported/test/CMakeLists.txt15
-rw-r--r--unsupported/test/EulerAngles.cpp208
-rw-r--r--unsupported/test/cxx11_eventcount.cpp6
-rw-r--r--unsupported/test/cxx11_tensor_argmax_cuda.cu3
-rw-r--r--unsupported/test/cxx11_tensor_cast_float16_cuda.cu4
-rw-r--r--unsupported/test/cxx11_tensor_complex_cuda.cu78
-rw-r--r--unsupported/test/cxx11_tensor_contract_cuda.cu4
-rw-r--r--unsupported/test/cxx11_tensor_contraction.cpp23
-rw-r--r--unsupported/test/cxx11_tensor_cuda.cu105
-rw-r--r--unsupported/test/cxx11_tensor_device.cu4
-rw-r--r--unsupported/test/cxx11_tensor_of_float16_cuda.cu56
-rw-r--r--unsupported/test/cxx11_tensor_random_cuda.cu3
-rw-r--r--unsupported/test/cxx11_tensor_reduction_cuda.cu122
-rw-r--r--unsupported/test/cxx11_tensor_scan_cuda.cu4
-rw-r--r--unsupported/test/kronecker_product.cpp22
57 files changed, 1575 insertions, 339 deletions
diff --git a/unsupported/Eigen/CMakeLists.txt b/unsupported/Eigen/CMakeLists.txt
index 7478b6b0d..631a06014 100644
--- a/unsupported/Eigen/CMakeLists.txt
+++ b/unsupported/Eigen/CMakeLists.txt
@@ -4,6 +4,7 @@ set(Eigen_HEADERS
ArpackSupport
AutoDiff
BVH
+ EulerAngles
FFT
IterativeSolvers
KroneckerProduct
@@ -26,5 +27,6 @@ install(FILES
DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen COMPONENT Devel
)
-add_subdirectory(src)
-add_subdirectory(CXX11) \ No newline at end of file
+install(DIRECTORY src DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen COMPONENT Devel FILES_MATCHING PATTERN "*.h")
+
+add_subdirectory(CXX11)
diff --git a/unsupported/Eigen/CXX11/CMakeLists.txt b/unsupported/Eigen/CXX11/CMakeLists.txt
index a40bc4715..385ed240c 100644
--- a/unsupported/Eigen/CXX11/CMakeLists.txt
+++ b/unsupported/Eigen/CXX11/CMakeLists.txt
@@ -5,4 +5,4 @@ install(FILES
DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11 COMPONENT Devel
)
-add_subdirectory(src)
+install(DIRECTORY src DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11 COMPONENT Devel FILES_MATCHING PATTERN "*.h")
diff --git a/unsupported/Eigen/CXX11/src/CMakeLists.txt b/unsupported/Eigen/CXX11/src/CMakeLists.txt
deleted file mode 100644
index 1734262bb..000000000
--- a/unsupported/Eigen/CXX11/src/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-add_subdirectory(util)
-add_subdirectory(ThreadPool)
-add_subdirectory(Tensor)
-add_subdirectory(TensorSymmetry)
diff --git a/unsupported/Eigen/CXX11/src/Tensor/CMakeLists.txt b/unsupported/Eigen/CXX11/src/Tensor/CMakeLists.txt
deleted file mode 100644
index 6d4b3ea0d..000000000
--- a/unsupported/Eigen/CXX11/src/Tensor/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_CXX11_Tensor_SRCS "*.h")
-
-INSTALL(FILES
- ${Eigen_CXX11_Tensor_SRCS}
- DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11/src/Tensor COMPONENT Devel
- )
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
index e3880d2e0..3c8710255 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
@@ -192,6 +192,12 @@ class TensorBase<Derived, ReadOnlyAccessors>
}
EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_log1p_op<Scalar>, const Derived>
+ log1p() const {
+ return unaryExpr(internal::scalar_log1p_op<Scalar>());
+ }
+
+ EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_abs_op<Scalar>, const Derived>
abs() const {
return unaryExpr(internal::scalar_abs_op<Scalar>());
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
index 56d9c2025..20b29e5fd 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
@@ -25,8 +25,9 @@ template<typename Dimensions, typename LhsXprType, typename RhsXprType>
struct traits<TensorContractionOp<Dimensions, LhsXprType, RhsXprType> >
{
// Type promotion to handle the case where the types of the lhs and the rhs are different.
- typedef typename internal::promote_storage_type<typename LhsXprType::Scalar,
- typename RhsXprType::Scalar>::ret Scalar;
+ typedef typename gebp_traits<typename remove_const<typename LhsXprType::Scalar>::type,
+ typename remove_const<typename RhsXprType::Scalar>::type>::ResScalar Scalar;
+
typedef typename promote_storage_type<typename traits<LhsXprType>::StorageKind,
typename traits<RhsXprType>::StorageKind>::ret StorageKind;
typedef typename promote_index_type<typename traits<LhsXprType>::Index,
@@ -75,8 +76,8 @@ class TensorContractionOp : public TensorBase<TensorContractionOp<Indices, LhsXp
{
public:
typedef typename Eigen::internal::traits<TensorContractionOp>::Scalar Scalar;
- typedef typename internal::promote_storage_type<typename LhsXprType::CoeffReturnType,
- typename RhsXprType::CoeffReturnType>::ret CoeffReturnType;
+ typedef typename internal::gebp_traits<typename LhsXprType::CoeffReturnType,
+ typename RhsXprType::CoeffReturnType>::ResScalar CoeffReturnType;
typedef typename Eigen::internal::nested<TensorContractionOp>::type Nested;
typedef typename Eigen::internal::traits<TensorContractionOp>::StorageKind StorageKind;
typedef typename Eigen::internal::traits<TensorContractionOp>::Index Index;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h b/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h
index a76c8ca35..d66e45d50 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorCostModel.h
@@ -91,21 +91,21 @@ class TensorOpCost {
}
// TODO(rmlarsen): Define min in terms of total cost, not elementwise.
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost& cwiseMin(
- const TensorOpCost& rhs) {
- bytes_loaded_ = numext::mini(bytes_loaded_, rhs.bytes_loaded());
- bytes_stored_ = numext::mini(bytes_stored_, rhs.bytes_stored());
- compute_cycles_ = numext::mini(compute_cycles_, rhs.compute_cycles());
- return *this;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost cwiseMin(
+ const TensorOpCost& rhs) const {
+ double bytes_loaded = numext::mini(bytes_loaded_, rhs.bytes_loaded());
+ double bytes_stored = numext::mini(bytes_stored_, rhs.bytes_stored());
+ double compute_cycles = numext::mini(compute_cycles_, rhs.compute_cycles());
+ return TensorOpCost(bytes_loaded, bytes_stored, compute_cycles);
}
// TODO(rmlarsen): Define max in terms of total cost, not elementwise.
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost& cwiseMax(
- const TensorOpCost& rhs) {
- bytes_loaded_ = numext::maxi(bytes_loaded_, rhs.bytes_loaded());
- bytes_stored_ = numext::maxi(bytes_stored_, rhs.bytes_stored());
- compute_cycles_ = numext::maxi(compute_cycles_, rhs.compute_cycles());
- return *this;
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost cwiseMax(
+ const TensorOpCost& rhs) const {
+ double bytes_loaded = numext::maxi(bytes_loaded_, rhs.bytes_loaded());
+ double bytes_stored = numext::maxi(bytes_stored_, rhs.bytes_stored());
+ double compute_cycles = numext::maxi(compute_cycles_, rhs.compute_cycles());
+ return TensorOpCost(bytes_loaded, bytes_stored, compute_cycles);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost& operator+=(
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
index b2b4bcf62..834ce07df 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
@@ -239,7 +239,7 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
EIGEN_DEVICE_FUNC
TensorEvaluator(const XprType& op, const Device& device)
- : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device)
+ : m_functor(op.functor()), m_argImpl(op.nestedExpression(), device), m_wrapper()
{ }
typedef typename XprType::Index Index;
@@ -256,13 +256,13 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
{
- return m_functor(index);
+ return m_wrapper(m_functor, index);
}
template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
- return m_functor.template packetOp<Index, PacketReturnType>(index);
+ return m_wrapper.template packetOp<PacketReturnType, Index>(m_functor, index);
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
@@ -282,6 +282,7 @@ struct TensorEvaluator<const TensorCwiseNullaryOp<NullaryOp, ArgType>, Device>
private:
const NullaryOp m_functor;
TensorEvaluator<ArgType, Device> m_argImpl;
+ const internal::nullary_wrapper<CoeffReturnType,NullaryOp> m_wrapper;
};
@@ -612,7 +613,7 @@ struct TensorEvaluator<const TensorSelectOp<IfArgType, ThenArgType, ElseArgType>
.cwiseMax(m_elseImpl.costPerCoeff(vectorized));
}
- EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType* data() const { return NULL; }
/// required by sycl in order to extract the accessor
const TensorEvaluator<IfArgType, Device> & cond_impl() const { return m_condImpl; }
/// required by sycl in order to extract the accessor
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
index a8e48fced..fc75dbb5c 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
@@ -25,7 +25,7 @@ struct scalar_mod_op {
};
template <typename Scalar>
struct functor_traits<scalar_mod_op<Scalar> >
-{ enum { Cost = NumTraits<Scalar>::template Div<false>::Cost, PacketAccess = false }; };
+{ enum { Cost = scalar_div_cost<Scalar,false>::value, PacketAccess = false }; };
/** \internal
@@ -38,7 +38,7 @@ struct scalar_mod2_op {
};
template <typename Scalar>
struct functor_traits<scalar_mod2_op<Scalar> >
-{ enum { Cost = NumTraits<Scalar>::template Div<false>::Cost, PacketAccess = false }; };
+{ enum { Cost = scalar_div_cost<Scalar,false>::value, PacketAccess = false }; };
template <typename Scalar>
struct scalar_fmod_op {
@@ -188,6 +188,32 @@ struct reducer_traits<MeanReducer<T>, Device> {
};
+template <typename T, bool IsMax = true, bool IsInteger = true>
+struct MinMaxBottomValue {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static T bottom_value() {
+ return Eigen::NumTraits<T>::lowest();
+ }
+};
+template <typename T>
+struct MinMaxBottomValue<T, true, false> {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static T bottom_value() {
+ return -Eigen::NumTraits<T>::infinity();
+ }
+};
+template <typename T>
+struct MinMaxBottomValue<T, false, true> {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static T bottom_value() {
+ return Eigen::NumTraits<T>::highest();
+ }
+};
+template <typename T>
+struct MinMaxBottomValue<T, false, false> {
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static T bottom_value() {
+ return Eigen::NumTraits<T>::infinity();
+ }
+};
+
+
template <typename T> struct MaxReducer
{
static const bool PacketAccess = packet_traits<T>::HasMax;
@@ -200,9 +226,8 @@ template <typename T> struct MaxReducer
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const {
(*accum) = pmax<Packet>(*accum, p);
}
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
- return Eigen::NumTraits<T>::lowest();
+ return MinMaxBottomValue<T, true, Eigen::NumTraits<T>::IsInteger>::bottom_value();
}
template <typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
@@ -242,9 +267,8 @@ template <typename T> struct MinReducer
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const {
(*accum) = pmin<Packet>(*accum, p);
}
-
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
- return Eigen::NumTraits<T>::highest();
+ return MinMaxBottomValue<T, false, Eigen::NumTraits<T>::IsInteger>::bottom_value();
}
template <typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
@@ -454,12 +478,11 @@ template <typename T> class UniformRandomGenerator {
m_deterministic = other.m_deterministic;
}
- template<typename Index>
- T operator()(Index) const {
+ T operator()() const {
return random<T>();
}
- template<typename Index, typename PacketType>
- PacketType packetOp(Index) const {
+ template<typename PacketType>
+ PacketType packetOp() const {
const int packetSize = internal::unpacket_traits<PacketType>::size;
EIGEN_ALIGN_MAX T values[packetSize];
for (int i = 0; i < packetSize; ++i) {
@@ -484,23 +507,22 @@ template <> class UniformRandomGenerator<float> {
}
UniformRandomGenerator(const UniformRandomGenerator<float>& other) {
m_generator = new std::mt19937();
- m_generator->seed(other(0) * UINT_MAX);
+ m_generator->seed(other() * UINT_MAX);
m_deterministic = other.m_deterministic;
}
~UniformRandomGenerator() {
delete m_generator;
}
- template<typename Index>
- float operator()(Index) const {
+ float operator()() const {
return m_distribution(*m_generator);
}
- template<typename Index, typename PacketType>
- PacketType packetOp(Index i) const {
+ template<typename PacketType>
+ PacketType packetOp() const {
const int packetSize = internal::unpacket_traits<PacketType>::size;
EIGEN_ALIGN_MAX float values[packetSize];
for (int k = 0; k < packetSize; ++k) {
- values[k] = this->operator()(i);
+ values[k] = this->operator()();
}
return internal::pload<PacketType>(values);
}
@@ -525,23 +547,22 @@ template <> class UniformRandomGenerator<double> {
}
UniformRandomGenerator(const UniformRandomGenerator<double>& other) {
m_generator = new std::mt19937();
- m_generator->seed(other(0) * UINT_MAX);
+ m_generator->seed(other() * UINT_MAX);
m_deterministic = other.m_deterministic;
}
~UniformRandomGenerator() {
delete m_generator;
}
- template<typename Index>
- double operator()(Index) const {
+ double operator()() const {
return m_distribution(*m_generator);
}
- template<typename Index, typename PacketType>
- PacketType packetOp(Index i) const {
+ template<typename PacketType>
+ PacketType packetOp() const {
const int packetSize = internal::unpacket_traits<PacketType>::size;
EIGEN_ALIGN_MAX double values[packetSize];
for (int k = 0; k < packetSize; ++k) {
- values[k] = this->operator()(i);
+ values[k] = this->operator()();
}
return internal::pload<PacketType>(values);
}
@@ -578,12 +599,11 @@ template <> class UniformRandomGenerator<float> {
curand_init(seed, tid, 0, &m_state);
}
- template<typename Index>
- __device__ float operator()(Index) const {
+ __device__ float operator()() const {
return curand_uniform(&m_state);
}
- template<typename Index, typename PacketType>
- __device__ float4 packetOp(Index) const {
+ template<typename PacketType>
+ __device__ float4 packetOp() const {
EIGEN_STATIC_ASSERT((is_same<PacketType, float4>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
return curand_uniform4(&m_state);
}
@@ -608,12 +628,11 @@ template <> class UniformRandomGenerator<double> {
const int seed = m_deterministic ? 0 : get_random_seed();
curand_init(seed, tid, 0, &m_state);
}
- template<typename Index>
- __device__ double operator()(Index) const {
+ __device__ double operator()() const {
return curand_uniform_double(&m_state);
}
- template<typename Index, typename PacketType>
- __device__ double2 packetOp(Index) const {
+ template<typename PacketType>
+ __device__ double2 packetOp() const {
EIGEN_STATIC_ASSERT((is_same<PacketType, double2>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
return curand_uniform2_double(&m_state);
}
@@ -638,8 +657,7 @@ template <> class UniformRandomGenerator<std::complex<float> > {
const int seed = m_deterministic ? 0 : get_random_seed();
curand_init(seed, tid, 0, &m_state);
}
- template<typename Index>
- __device__ std::complex<float> operator()(Index) const {
+ __device__ std::complex<float> operator()() const {
float4 vals = curand_uniform4(&m_state);
return std::complex<float>(vals.x, vals.y);
}
@@ -664,8 +682,7 @@ template <> class UniformRandomGenerator<std::complex<double> > {
const int seed = m_deterministic ? 0 : get_random_seed();
curand_init(seed, tid, 0, &m_state);
}
- template<typename Index>
- __device__ std::complex<double> operator()(Index) const {
+ __device__ std::complex<double> operator()() const {
double2 vals = curand_uniform2_double(&m_state);
return std::complex<double>(vals.x, vals.y);
}
@@ -701,17 +718,16 @@ template <typename T> class NormalRandomGenerator {
}
NormalRandomGenerator(const NormalRandomGenerator& other)
: m_deterministic(other.m_deterministic), m_distribution(other.m_distribution), m_generator(new std::mt19937()) {
- m_generator->seed(other(0) * UINT_MAX);
+ m_generator->seed(other() * UINT_MAX);
}
~NormalRandomGenerator() {
delete m_generator;
}
- template<typename Index>
- T operator()(Index) const {
+ T operator()() const {
return m_distribution(*m_generator);
}
- template<typename Index, typename PacketType>
- PacketType packetOp(Index) const {
+ template<typename PacketType>
+ PacketType packetOp() const {
const int packetSize = internal::unpacket_traits<PacketType>::size;
EIGEN_ALIGN_MAX T values[packetSize];
for (int i = 0; i < packetSize; ++i) {
@@ -749,12 +765,11 @@ template <> class NormalRandomGenerator<float> {
const int seed = m_deterministic ? 0 : get_random_seed();
curand_init(seed, tid, 0, &m_state);
}
- template<typename Index>
- __device__ float operator()(Index) const {
+ __device__ float operator()() const {
return curand_normal(&m_state);
}
- template<typename Index, typename PacketType>
- __device__ float4 packetOp(Index) const {
+ template<typename PacketType>
+ __device__ float4 packetOp() const {
EIGEN_STATIC_ASSERT((is_same<PacketType, float4>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
return curand_normal4(&m_state);
}
@@ -779,12 +794,11 @@ template <> class NormalRandomGenerator<double> {
const int seed = m_deterministic ? 0 : get_random_seed();
curand_init(seed, tid, 0, &m_state);
}
- template<typename Index>
- __device__ double operator()(Index) const {
+ __device__ double operator()() const {
return curand_normal_double(&m_state);
}
- template<typename Index, typename PacketType>
- __device__ double2 packetOp(Index) const {
+ template<typename PacketType>
+ __device__ double2 packetOp() const {
EIGEN_STATIC_ASSERT((is_same<PacketType, double2>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
return curand_normal2_double(&m_state);
}
@@ -809,8 +823,7 @@ template <> class NormalRandomGenerator<std::complex<float> > {
const int seed = m_deterministic ? 0 : get_random_seed();
curand_init(seed, tid, 0, &m_state);
}
- template<typename Index>
- __device__ std::complex<float> operator()(Index) const {
+ __device__ std::complex<float> operator()() const {
float4 vals = curand_normal4(&m_state);
return std::complex<float>(vals.x, vals.y);
}
@@ -835,8 +848,7 @@ template <> class NormalRandomGenerator<std::complex<double> > {
const int seed = m_deterministic ? 0 : get_random_seed();
curand_init(seed, tid, 0, &m_state);
}
- template<typename Index>
- __device__ std::complex<double> operator()(Index) const {
+ __device__ std::complex<double> operator()() const {
double2 vals = curand_normal2_double(&m_state);
return std::complex<double>(vals.x, vals.y);
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
index 9df697e4c..a87777b22 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@@ -505,9 +505,14 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
(reducing_inner_dims || ReducingInnerMostDims)) {
const Index num_values_to_reduce = internal::array_prod(m_reducedDims);
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
- if (!data && num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 128) {
- data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
- m_result = data;
+ if (!data) {
+ if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 128) {
+ data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
+ m_result = data;
+ }
+ else {
+ return true;
+ }
}
Op reducer(m_reducer);
if (internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) {
@@ -533,9 +538,14 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
preserving_inner_dims) {
const Index num_values_to_reduce = internal::array_prod(m_reducedDims);
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
- if (!data && num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 32) {
- data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
- m_result = data;
+ if (!data) {
+ if (num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 32) {
+ data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
+ m_result = data;
+ }
+ else {
+ return true;
+ }
}
Op reducer(m_reducer);
if (internal::OuterReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) {
@@ -556,6 +566,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
m_impl.cleanup();
if (m_result) {
m_device.deallocate(m_result);
+ m_result = NULL;
}
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
index 5e512490c..65638b6a8 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
@@ -67,11 +67,21 @@ __device__ EIGEN_ALWAYS_INLINE void atomicReduce(T* output, T accum, R& reducer)
#endif
}
+// We extend atomicExch to support extra data types
+template <typename Type>
+__device__ inline Type atomicExchCustom(Type* address, Type val) {
+ return atomicExch(address, val);
+}
+
+template <>
+__device__ inline double atomicExchCustom(double* address, double val) {
+ unsigned long long int* address_as_ull = reinterpret_cast<unsigned long long int*>(address);
+ return __longlong_as_double(atomicExch(address_as_ull, __double_as_longlong(val)));
+}
#ifdef EIGEN_HAS_CUDA_FP16
template <template <typename T> class R>
__device__ inline void atomicReduce(half2* output, half2 accum, R<half>& reducer) {
-#if __CUDA_ARCH__ >= 300
unsigned int oldval = *reinterpret_cast<unsigned int*>(output);
unsigned int newval = oldval;
reducer.reducePacket(accum, reinterpret_cast<half2*>(&newval));
@@ -87,9 +97,6 @@ __device__ inline void atomicReduce(half2* output, half2 accum, R<half>& reducer
return;
}
}
-#else
- assert(0 && "Shouldn't be called on unsupported device");
-#endif
}
#endif
@@ -130,7 +137,7 @@ __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num
unsigned int block = atomicCAS(semaphore, 0u, 1u);
if (block == 0) {
// We're the first block to run, initialize the output value
- atomicExch(output, reducer.initialize());
+ atomicExchCustom(output, reducer.initialize());
__threadfence();
atomicExch(semaphore, 2u);
}
@@ -263,17 +270,22 @@ __global__ void ReductionCleanupKernelHalfFloat(Op& reducer, half* output, half2
#endif
-
-template <typename Self, typename Op, typename OutputType, bool PacketAccess>
+template <typename Self, typename Op, typename OutputType, bool PacketAccess, typename Enabled = void>
struct FullReductionLauncher {
static void run(const Self&, Op&, const GpuDevice&, OutputType*, typename Self::Index) {
- assert(false && "Should only be called on floats and half floats");
+ assert(false && "Should only be called on doubles, floats and half floats");
}
};
-template <typename Self, typename Op, bool PacketAccess>
-struct FullReductionLauncher<Self, Op, float, PacketAccess> {
- static void run(const Self& self, Op& reducer, const GpuDevice& device, float* output, typename Self::Index num_coeffs) {
+// Specialization for float and double
+template <typename Self, typename Op, typename OutputType, bool PacketAccess>
+struct FullReductionLauncher<
+ Self, Op, OutputType, PacketAccess,
+ typename internal::enable_if<
+ internal::is_same<float, OutputType>::value ||
+ internal::is_same<double, OutputType>::value,
+ void>::type> {
+ static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs) {
typedef typename Self::Index Index;
typedef typename Self::CoeffReturnType Scalar;
const int block_size = 256;
@@ -330,20 +342,22 @@ struct FullReductionLauncher<Self, Op, Eigen::half, true> {
template <typename Self, typename Op, bool Vectorizable>
struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
// Unfortunately nvidia doesn't support well exotic types such as complex,
- // so reduce the scope of the optimized version of the code to the simple case
- // of floats and half floats.
+ // so reduce the scope of the optimized version of the code to the simple cases
+ // of doubles, floats and half floats
#ifdef EIGEN_HAS_CUDA_FP16
static const bool HasOptimizedImplementation = !Op::IsStateful &&
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
+ internal::is_same<typename Self::CoeffReturnType, double>::value ||
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
#else
static const bool HasOptimizedImplementation = !Op::IsStateful &&
- internal::is_same<typename Self::CoeffReturnType, float>::value;
+ (internal::is_same<typename Self::CoeffReturnType, float>::value ||
+ internal::is_same<typename Self::CoeffReturnType, double>::value);
#endif
template <typename OutputType>
static void run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output) {
- assert(HasOptimizedImplementation && "Should only be called on floats or half floats");
+ assert(HasOptimizedImplementation && "Should only be called on doubles, floats or half floats");
const Index num_coeffs = array_prod(self.m_impl.dimensions());
// Don't crash when we're called with an input tensor of size 0.
if (num_coeffs == 0) {
@@ -360,6 +374,7 @@ template <int NumPerThread, typename Self,
__global__ void InnerReductionKernel(Reducer reducer, const Self input, Index num_coeffs_to_reduce, Index num_preserved_coeffs,
typename Self::CoeffReturnType* output) {
#if __CUDA_ARCH__ >= 300
+ typedef typename Self::CoeffReturnType Type;
eigen_assert(blockDim.y == 1);
eigen_assert(blockDim.z == 1);
eigen_assert(gridDim.y == 1);
@@ -389,13 +404,13 @@ __global__ void InnerReductionKernel(Reducer reducer, const Self input, Index nu
const Index col_block = i % input_col_blocks;
const Index col_begin = col_block * blockDim.x * NumPerThread + threadIdx.x;
- float reduced_val = reducer.initialize();
+ Type reduced_val = reducer.initialize();
for (Index j = 0; j < NumPerThread; j += unroll_times) {
const Index last_col = col_begin + blockDim.x * (j + unroll_times - 1);
if (last_col >= num_coeffs_to_reduce) {
for (Index col = col_begin + blockDim.x * j; col < num_coeffs_to_reduce; col += blockDim.x) {
- const float val = input.m_impl.coeff(row * num_coeffs_to_reduce + col);
+ const Type val = input.m_impl.coeff(row * num_coeffs_to_reduce + col);
reducer.reduce(val, &reduced_val);
}
break;
@@ -521,17 +536,23 @@ __global__ void InnerReductionKernelHalfFloat(Reducer reducer, const Self input,
#endif
-template <typename Self, typename Op, typename OutputType, bool PacketAccess>
+template <typename Self, typename Op, typename OutputType, bool PacketAccess, typename Enabled = void>
struct InnerReductionLauncher {
static EIGEN_DEVICE_FUNC bool run(const Self&, Op&, const GpuDevice&, OutputType*, typename Self::Index, typename Self::Index) {
- assert(false && "Should only be called to reduce floats and half floats on a gpu device");
+ assert(false && "Should only be called to reduce doubles, floats and half floats on a gpu device");
return true;
}
};
-template <typename Self, typename Op, bool PacketAccess>
-struct InnerReductionLauncher<Self, Op, float, PacketAccess> {
- static bool run(const Self& self, Op& reducer, const GpuDevice& device, float* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
+// Specialization for float and double
+template <typename Self, typename Op, typename OutputType, bool PacketAccess>
+struct InnerReductionLauncher<
+ Self, Op, OutputType, PacketAccess,
+ typename internal::enable_if<
+ internal::is_same<float, OutputType>::value ||
+ internal::is_same<double, OutputType>::value,
+ void>::type> {
+ static bool run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
typedef typename Self::Index Index;
const Index num_coeffs = num_coeffs_to_reduce * num_preserved_vals;
@@ -549,7 +570,7 @@ struct InnerReductionLauncher<Self, Op, float, PacketAccess> {
const int max_blocks = device.getNumCudaMultiProcessors() *
device.maxCudaThreadsPerMultiProcessor() / 1024;
const int num_blocks = numext::mini<int>(max_blocks, dyn_blocks);
- LAUNCH_CUDA_KERNEL((ReductionInitKernel<float, Index>),
+ LAUNCH_CUDA_KERNEL((ReductionInitKernel<OutputType, Index>),
num_blocks, 1024, 0, device, reducer.initialize(),
num_preserved_vals, output);
}
@@ -616,15 +637,17 @@ struct InnerReducer<Self, Op, GpuDevice> {
#ifdef EIGEN_HAS_CUDA_FP16
static const bool HasOptimizedImplementation = !Op::IsStateful &&
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
+ internal::is_same<typename Self::CoeffReturnType, double>::value ||
(internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
#else
static const bool HasOptimizedImplementation = !Op::IsStateful &&
- internal::is_same<typename Self::CoeffReturnType, float>::value;
+ (internal::is_same<typename Self::CoeffReturnType, float>::value ||
+ internal::is_same<typename Self::CoeffReturnType, double>::value);
#endif
template <typename OutputType>
static bool run(const Self& self, Op& reducer, const GpuDevice& device, OutputType* output, typename Self::Index num_coeffs_to_reduce, typename Self::Index num_preserved_vals) {
- assert(HasOptimizedImplementation && "Should only be called on floats or half floats");
+ assert(HasOptimizedImplementation && "Should only be called on doubles, floats or half floats");
const Index num_coeffs = array_prod(self.m_impl.dimensions());
// Don't crash when we're called with an input tensor of size 0.
if (num_coeffs == 0) {
@@ -675,11 +698,11 @@ struct OuterReducer<Self, Op, GpuDevice> {
// so reduce the scope of the optimized version of the code to the simple case
// of floats.
static const bool HasOptimizedImplementation = !Op::IsStateful &&
- internal::is_same<typename Self::CoeffReturnType, float>::value;
-
+ (internal::is_same<typename Self::CoeffReturnType, float>::value ||
+ internal::is_same<typename Self::CoeffReturnType, double>::value);
template <typename Device, typename OutputType>
static EIGEN_DEVICE_FUNC bool run(const Self&, Op&, const Device&, OutputType*, typename Self::Index, typename Self::Index) {
- assert(false && "Should only be called to reduce floats on a gpu device");
+ assert(false && "Should only be called to reduce doubles or floats on a gpu device");
return true;
}
diff --git a/unsupported/Eigen/CXX11/src/TensorSymmetry/CMakeLists.txt b/unsupported/Eigen/CXX11/src/TensorSymmetry/CMakeLists.txt
deleted file mode 100644
index 6e871a8da..000000000
--- a/unsupported/Eigen/CXX11/src/TensorSymmetry/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-FILE(GLOB Eigen_CXX11_TensorSymmetry_SRCS "*.h")
-
-INSTALL(FILES
- ${Eigen_CXX11_TensorSymmetry_SRCS}
- DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11/src/TensorSymmetry COMPONENT Devel
- )
-
-add_subdirectory(util)
diff --git a/unsupported/Eigen/CXX11/src/TensorSymmetry/util/CMakeLists.txt b/unsupported/Eigen/CXX11/src/TensorSymmetry/util/CMakeLists.txt
deleted file mode 100644
index dc9fc78ec..000000000
--- a/unsupported/Eigen/CXX11/src/TensorSymmetry/util/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_CXX11_TensorSymmetry_util_SRCS "*.h")
-
-INSTALL(FILES
- ${Eigen_CXX11_TensorSymmetry_util_SRCS}
- DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11/src/TensorSymmetry/util COMPONENT Devel
- )
diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/CMakeLists.txt b/unsupported/Eigen/CXX11/src/ThreadPool/CMakeLists.txt
deleted file mode 100644
index 88fef50c6..000000000
--- a/unsupported/Eigen/CXX11/src/ThreadPool/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_CXX11_ThreadPool_SRCS "*.h")
-
-INSTALL(FILES
- ${Eigen_CXX11_ThreadPool_SRCS}
- DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11/src/ThreadPool COMPONENT Devel
- )
diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h b/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h
index 12b80d6c4..71d55552d 100644
--- a/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h
+++ b/unsupported/Eigen/CXX11/src/ThreadPool/EventCount.h
@@ -50,7 +50,7 @@ class EventCount {
public:
class Waiter;
- EventCount(std::vector<Waiter>& waiters) : waiters_(waiters) {
+ EventCount(MaxSizeVector<Waiter>& waiters) : waiters_(waiters) {
eigen_assert(waiters.size() < (1 << kWaiterBits) - 1);
// Initialize epoch to something close to overflow to test overflow.
state_ = kStackMask | (kEpochMask - kEpochInc * waiters.size() * 2);
@@ -199,7 +199,7 @@ class EventCount {
static const uint64_t kEpochMask = ((1ull << kEpochBits) - 1) << kEpochShift;
static const uint64_t kEpochInc = 1ull << kEpochShift;
std::atomic<uint64_t> state_;
- std::vector<Waiter>& waiters_;
+ MaxSizeVector<Waiter>& waiters_;
void Park(Waiter* w) {
std::unique_lock<std::mutex> lock(w->mu);
diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
index 33ae45131..354bce52a 100644
--- a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
@@ -29,6 +29,8 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface {
spinning_(0),
done_(false),
ec_(waiters_) {
+ waiters_.resize(num_threads);
+
// Calculate coprimes of num_threads.
// Coprimes are used for a random walk over all threads in Steal
// and NonEmptyQueueIndex. Iteration is based on the fact that if we take
@@ -123,7 +125,7 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface {
MaxSizeVector<Thread*> threads_;
MaxSizeVector<Queue*> queues_;
MaxSizeVector<unsigned> coprimes_;
- std::vector<EventCount::Waiter> waiters_;
+ MaxSizeVector<EventCount::Waiter> waiters_;
std::atomic<unsigned> blocked_;
std::atomic<bool> spinning_;
std::atomic<bool> done_;
diff --git a/unsupported/Eigen/CXX11/src/util/CMakeLists.txt b/unsupported/Eigen/CXX11/src/util/CMakeLists.txt
deleted file mode 100644
index 7eab492d6..000000000
--- a/unsupported/Eigen/CXX11/src/util/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_CXX11_util_SRCS "*.h")
-
-INSTALL(FILES
- ${Eigen_CXX11_util_SRCS}
- DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/CXX11/src/util COMPONENT Devel
- )
diff --git a/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h b/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h
index 961456f10..4bc3dd1ba 100644
--- a/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h
+++ b/unsupported/Eigen/CXX11/src/util/MaxSizeVector.h
@@ -55,6 +55,17 @@ class MaxSizeVector {
internal::aligned_free(data_);
}
+ void resize(size_t n) {
+ eigen_assert(n <= reserve_);
+ for (size_t i = size_; i < n; ++i) {
+ new (&data_[i]) T;
+ }
+ for (size_t i = n; i < size_; ++i) {
+ data_[i].~T();
+ }
+ size_ = n;
+ }
+
// Append new elements (up to reserved size).
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
void push_back(const T& t) {
diff --git a/unsupported/Eigen/EulerAngles b/unsupported/Eigen/EulerAngles
new file mode 100644
index 000000000..521fa3f76
--- /dev/null
+++ b/unsupported/Eigen/EulerAngles
@@ -0,0 +1,43 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Tal Hadad <tal_hd@hotmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_EULERANGLES_MODULE_H
+#define EIGEN_EULERANGLES_MODULE_H
+
+
+#include "Eigen/Core"
+#include "Eigen/Geometry"
+
+#include "Eigen/src/Core/util/DisableStupidWarnings.h"
+
+namespace Eigen {
+
+/**
+ * \defgroup EulerAngles_Module EulerAngles module
+ * \brief This module provides generic euler angles rotation.
+ *
+ * Euler angles are a way to represent 3D rotation.
+ *
+ * In order to use this module in your code, include this header:
+ * \code
+ * #include <unsupported/Eigen/EulerAngles>
+ * \endcode
+ *
+ * See \ref EulerAngles for more information.
+ *
+ */
+
+}
+
+#include "src/EulerAngles/EulerSystem.h"
+#include "src/EulerAngles/EulerAngles.h"
+
+#include "Eigen/src/Core/util/ReenableStupidWarnings.h"
+
+#endif // EIGEN_EULERANGLES_MODULE_H
diff --git a/unsupported/Eigen/KroneckerProduct b/unsupported/Eigen/KroneckerProduct
index c932c06a6..5f5afb8cf 100644
--- a/unsupported/Eigen/KroneckerProduct
+++ b/unsupported/Eigen/KroneckerProduct
@@ -13,6 +13,8 @@
#include "../../Eigen/src/Core/util/DisableStupidWarnings.h"
+#include "../../Eigen/src/SparseCore/SparseUtil.h"
+
namespace Eigen {
/**
diff --git a/unsupported/Eigen/src/AutoDiff/CMakeLists.txt b/unsupported/Eigen/src/AutoDiff/CMakeLists.txt
deleted file mode 100644
index ad91fd9c4..000000000
--- a/unsupported/Eigen/src/AutoDiff/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_AutoDiff_SRCS "*.h")
-
-INSTALL(FILES
- ${Eigen_AutoDiff_SRCS}
- DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/AutoDiff COMPONENT Devel
- )
diff --git a/unsupported/Eigen/src/BVH/CMakeLists.txt b/unsupported/Eigen/src/BVH/CMakeLists.txt
deleted file mode 100644
index b377d865c..000000000
--- a/unsupported/Eigen/src/BVH/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_BVH_SRCS "*.h")
-
-INSTALL(FILES
- ${Eigen_BVH_SRCS}
- DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/BVH COMPONENT Devel
- )
diff --git a/unsupported/Eigen/src/CMakeLists.txt b/unsupported/Eigen/src/CMakeLists.txt
deleted file mode 100644
index f42946793..000000000
--- a/unsupported/Eigen/src/CMakeLists.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-ADD_SUBDIRECTORY(AutoDiff)
-ADD_SUBDIRECTORY(BVH)
-ADD_SUBDIRECTORY(Eigenvalues)
-ADD_SUBDIRECTORY(FFT)
-ADD_SUBDIRECTORY(IterativeSolvers)
-ADD_SUBDIRECTORY(LevenbergMarquardt)
-ADD_SUBDIRECTORY(MatrixFunctions)
-ADD_SUBDIRECTORY(MoreVectorization)
-ADD_SUBDIRECTORY(NonLinearOptimization)
-ADD_SUBDIRECTORY(NumericalDiff)
-ADD_SUBDIRECTORY(Polynomials)
-ADD_SUBDIRECTORY(Skyline)
-ADD_SUBDIRECTORY(SparseExtra)
-ADD_SUBDIRECTORY(SpecialFunctions)
-ADD_SUBDIRECTORY(KroneckerProduct)
-ADD_SUBDIRECTORY(Splines)
diff --git a/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h b/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h
index 3b6a69aff..866a8a460 100644
--- a/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h
+++ b/unsupported/Eigen/src/Eigenvalues/ArpackSelfAdjointEigenSolver.h
@@ -628,15 +628,15 @@ ArpackGeneralizedSelfAdjointEigenSolver<MatrixType, MatrixSolver, BisSPD>&
m_info = Success;
}
- delete select;
+ delete[] select;
}
- delete v;
- delete iparam;
- delete ipntr;
- delete workd;
- delete workl;
- delete resid;
+ delete[] v;
+ delete[] iparam;
+ delete[] ipntr;
+ delete[] workd;
+ delete[] workl;
+ delete[] resid;
m_isInitialized = true;
diff --git a/unsupported/Eigen/src/Eigenvalues/CMakeLists.txt b/unsupported/Eigen/src/Eigenvalues/CMakeLists.txt
deleted file mode 100644
index 1d4387c82..000000000
--- a/unsupported/Eigen/src/Eigenvalues/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_Eigenvalues_SRCS "*.h")
-
-INSTALL(FILES
- ${Eigen_Eigenvalues_SRCS}
- DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/Eigenvalues COMPONENT Devel
- )
diff --git a/unsupported/Eigen/src/EulerAngles/CMakeLists.txt b/unsupported/Eigen/src/EulerAngles/CMakeLists.txt
new file mode 100644
index 000000000..40af550e8
--- /dev/null
+++ b/unsupported/Eigen/src/EulerAngles/CMakeLists.txt
@@ -0,0 +1,6 @@
+FILE(GLOB Eigen_EulerAngles_SRCS "*.h")
+
+INSTALL(FILES
+ ${Eigen_EulerAngles_SRCS}
+ DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/EulerAngles COMPONENT Devel
+ )
diff --git a/unsupported/Eigen/src/EulerAngles/EulerAngles.h b/unsupported/Eigen/src/EulerAngles/EulerAngles.h
new file mode 100644
index 000000000..13a0da1ab
--- /dev/null
+++ b/unsupported/Eigen/src/EulerAngles/EulerAngles.h
@@ -0,0 +1,386 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Tal Hadad <tal_hd@hotmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_EULERANGLESCLASS_H// TODO: Fix previous "EIGEN_EULERANGLES_H" definition?
+#define EIGEN_EULERANGLESCLASS_H
+
+namespace Eigen
+{
+ /*template<typename Other,
+ int OtherRows=Other::RowsAtCompileTime,
+ int OtherCols=Other::ColsAtCompileTime>
+ struct ei_eulerangles_assign_impl;*/
+
+ /** \class EulerAngles
+ *
+ * \ingroup EulerAngles_Module
+ *
+ * \brief Represents a rotation in a 3 dimensional space as three Euler angles.
+ *
+ * Euler rotation is a set of three rotation of three angles over three fixed axes, defined by the EulerSystem given as a template parameter.
+ *
+ * Here is how intrinsic Euler angles works:
+ * - first, rotate the axes system over the alpha axis in angle alpha
+ * - then, rotate the axes system over the beta axis(which was rotated in the first stage) in angle beta
+ * - then, rotate the axes system over the gamma axis(which was rotated in the two stages above) in angle gamma
+ *
+ * \note This class support only intrinsic Euler angles for simplicity,
+ * see EulerSystem how to easily overcome this for extrinsic systems.
+ *
+ * ### Rotation representation and conversions ###
+ *
+ * It has been proved(see Wikipedia link below) that every rotation can be represented
+ * by Euler angles, but there is no singular representation (e.g. unlike rotation matrices).
+ * Therefore, you can convert from Eigen rotation and to them
+ * (including rotation matrices, which is not called "rotations" by Eigen design).
+ *
+ * Euler angles usually used for:
+ * - convenient human representation of rotation, especially in interactive GUI.
+ * - gimbal systems and robotics
+ * - efficient encoding(i.e. 3 floats only) of rotation for network protocols.
+ *
+ * However, Euler angles are slow comparing to quaternion or matrices,
+ * because their unnatural math definition, although it's simple for human.
+ * To overcome this, this class provide easy movement from the math friendly representation
+ * to the human friendly representation, and vise-versa.
+ *
+ * All the user need to do is a safe simple C++ type conversion,
+ * and this class take care for the math.
+ * Additionally, some axes related computation is done in compile time.
+ *
+ * #### Euler angles ranges in conversions ####
+ *
+ * When converting some rotation to Euler angles, there are some ways you can guarantee
+ * the Euler angles ranges.
+ *
+ * #### implicit ranges ####
+ * When using implicit ranges, all angles are guarantee to be in the range [-PI, +PI],
+ * unless you convert from some other Euler angles.
+ * In this case, the range is __undefined__ (might be even less than -PI or greater than +2*PI).
+ * \sa EulerAngles(const MatrixBase<Derived>&)
+ * \sa EulerAngles(const RotationBase<Derived, 3>&)
+ *
+ * #### explicit ranges ####
+ * When using explicit ranges, all angles are guarantee to be in the range you choose.
+ * In the range Boolean parameter, you're been ask whether you prefer the positive range or not:
+ * - _true_ - force the range between [0, +2*PI]
+ * - _false_ - force the range between [-PI, +PI]
+ *
+ * ##### compile time ranges #####
+ * This is when you have compile time ranges and you prefer to
+ * use template parameter. (e.g. for performance)
+ * \sa FromRotation()
+ *
+ * ##### run-time time ranges #####
+ * Run-time ranges are also supported.
+ * \sa EulerAngles(const MatrixBase<Derived>&, bool, bool, bool)
+ * \sa EulerAngles(const RotationBase<Derived, 3>&, bool, bool, bool)
+ *
+ * ### Convenient user typedefs ###
+ *
+ * Convenient typedefs for EulerAngles exist for float and double scalar,
+ * in a form of EulerAngles{A}{B}{C}{scalar},
+ * e.g. \ref EulerAnglesXYZd, \ref EulerAnglesZYZf.
+ *
+ * Only for positive axes{+x,+y,+z} Euler systems are have convenient typedef.
+ * If you need negative axes{-x,-y,-z}, it is recommended to create you own typedef with
+ * a word that represent what you need.
+ *
+ * ### Example ###
+ *
+ * \include EulerAngles.cpp
+ * Output: \verbinclude EulerAngles.out
+ *
+ * ### Additional reading ###
+ *
+ * If you're want to get more idea about how Euler system work in Eigen see EulerSystem.
+ *
+ * More information about Euler angles: https://en.wikipedia.org/wiki/Euler_angles
+ *
+ * \tparam _Scalar the scalar type, i.e., the type of the angles.
+ *
+ * \tparam _System the EulerSystem to use, which represents the axes of rotation.
+ */
+ template <typename _Scalar, class _System>
+ class EulerAngles : public RotationBase<EulerAngles<_Scalar, _System>, 3>
+ {
+ public:
+ /** the scalar type of the angles */
+ typedef _Scalar Scalar;
+
+ /** the EulerSystem to use, which represents the axes of rotation. */
+ typedef _System System;
+
+ typedef Matrix<Scalar,3,3> Matrix3; /*!< the equivalent rotation matrix type */
+ typedef Matrix<Scalar,3,1> Vector3; /*!< the equivalent 3 dimension vector type */
+ typedef Quaternion<Scalar> QuaternionType; /*!< the equivalent quaternion type */
+ typedef AngleAxis<Scalar> AngleAxisType; /*!< the equivalent angle-axis type */
+
+ /** \returns the axis vector of the first (alpha) rotation */
+ static Vector3 AlphaAxisVector() {
+ const Vector3& u = Vector3::Unit(System::AlphaAxisAbs - 1);
+ return System::IsAlphaOpposite ? -u : u;
+ }
+
+ /** \returns the axis vector of the second (beta) rotation */
+ static Vector3 BetaAxisVector() {
+ const Vector3& u = Vector3::Unit(System::BetaAxisAbs - 1);
+ return System::IsBetaOpposite ? -u : u;
+ }
+
+ /** \returns the axis vector of the third (gamma) rotation */
+ static Vector3 GammaAxisVector() {
+ const Vector3& u = Vector3::Unit(System::GammaAxisAbs - 1);
+ return System::IsGammaOpposite ? -u : u;
+ }
+
+ private:
+ Vector3 m_angles;
+
+ public:
+ /** Default constructor without initialization. */
+ EulerAngles() {}
+ /** Constructs and initialize Euler angles(\p alpha, \p beta, \p gamma). */
+ EulerAngles(const Scalar& alpha, const Scalar& beta, const Scalar& gamma) :
+ m_angles(alpha, beta, gamma) {}
+
+ /** Constructs and initialize Euler angles from a 3x3 rotation matrix \p m.
+ *
+ * \note All angles will be in the range [-PI, PI].
+ */
+ template<typename Derived>
+ EulerAngles(const MatrixBase<Derived>& m) { *this = m; }
+
+ /** Constructs and initialize Euler angles from a 3x3 rotation matrix \p m,
+ * with options to choose for each angle the requested range.
+ *
+ * If positive range is true, then the specified angle will be in the range [0, +2*PI].
+ * Otherwise, the specified angle will be in the range [-PI, +PI].
+ *
+ * \param m The 3x3 rotation matrix to convert
+ * \param positiveRangeAlpha If true, alpha will be in [0, 2*PI]. Otherwise, in [-PI, +PI].
+ * \param positiveRangeBeta If true, beta will be in [0, 2*PI]. Otherwise, in [-PI, +PI].
+ * \param positiveRangeGamma If true, gamma will be in [0, 2*PI]. Otherwise, in [-PI, +PI].
+ */
+ template<typename Derived>
+ EulerAngles(
+ const MatrixBase<Derived>& m,
+ bool positiveRangeAlpha,
+ bool positiveRangeBeta,
+ bool positiveRangeGamma) {
+
+ System::CalcEulerAngles(*this, m, positiveRangeAlpha, positiveRangeBeta, positiveRangeGamma);
+ }
+
+ /** Constructs and initialize Euler angles from a rotation \p rot.
+ *
+ * \note All angles will be in the range [-PI, PI], unless \p rot is an EulerAngles.
+ * If rot is an EulerAngles, expected EulerAngles range is __undefined__.
+ * (Use other functions here for enforcing range if this effect is desired)
+ */
+ template<typename Derived>
+ EulerAngles(const RotationBase<Derived, 3>& rot) { *this = rot; }
+
+ /** Constructs and initialize Euler angles from a rotation \p rot,
+ * with options to choose for each angle the requested range.
+ *
+ * If positive range is true, then the specified angle will be in the range [0, +2*PI].
+ * Otherwise, the specified angle will be in the range [-PI, +PI].
+ *
+ * \param rot The 3x3 rotation matrix to convert
+ * \param positiveRangeAlpha If true, alpha will be in [0, 2*PI]. Otherwise, in [-PI, +PI].
+ * \param positiveRangeBeta If true, beta will be in [0, 2*PI]. Otherwise, in [-PI, +PI].
+ * \param positiveRangeGamma If true, gamma will be in [0, 2*PI]. Otherwise, in [-PI, +PI].
+ */
+ template<typename Derived>
+ EulerAngles(
+ const RotationBase<Derived, 3>& rot,
+ bool positiveRangeAlpha,
+ bool positiveRangeBeta,
+ bool positiveRangeGamma) {
+
+ System::CalcEulerAngles(*this, rot.toRotationMatrix(), positiveRangeAlpha, positiveRangeBeta, positiveRangeGamma);
+ }
+
+ /** \returns The angle values stored in a vector (alpha, beta, gamma). */
+ const Vector3& angles() const { return m_angles; }
+ /** \returns A read-write reference to the angle values stored in a vector (alpha, beta, gamma). */
+ Vector3& angles() { return m_angles; }
+
+ /** \returns The value of the first angle. */
+ Scalar alpha() const { return m_angles[0]; }
+ /** \returns A read-write reference to the angle of the first angle. */
+ Scalar& alpha() { return m_angles[0]; }
+
+ /** \returns The value of the second angle. */
+ Scalar beta() const { return m_angles[1]; }
+ /** \returns A read-write reference to the angle of the second angle. */
+ Scalar& beta() { return m_angles[1]; }
+
+ /** \returns The value of the third angle. */
+ Scalar gamma() const { return m_angles[2]; }
+ /** \returns A read-write reference to the angle of the third angle. */
+ Scalar& gamma() { return m_angles[2]; }
+
+ /** \returns The Euler angles rotation inverse (which is as same as the negative),
+ * (-alpha, -beta, -gamma).
+ */
+ EulerAngles inverse() const
+ {
+ EulerAngles res;
+ res.m_angles = -m_angles;
+ return res;
+ }
+
+ /** \returns The Euler angles rotation negative (which is as same as the inverse),
+ * (-alpha, -beta, -gamma).
+ */
+ EulerAngles operator -() const
+ {
+ return inverse();
+ }
+
+ /** Constructs and initialize Euler angles from a 3x3 rotation matrix \p m,
+ * with options to choose for each angle the requested range (__only in compile time__).
+ *
+ * If positive range is true, then the specified angle will be in the range [0, +2*PI].
+ * Otherwise, the specified angle will be in the range [-PI, +PI].
+ *
+ * \param m The 3x3 rotation matrix to convert
+ * \tparam positiveRangeAlpha If true, alpha will be in [0, 2*PI]. Otherwise, in [-PI, +PI].
+ * \tparam positiveRangeBeta If true, beta will be in [0, 2*PI]. Otherwise, in [-PI, +PI].
+ * \tparam positiveRangeGamma If true, gamma will be in [0, 2*PI]. Otherwise, in [-PI, +PI].
+ */
+ template<
+ bool PositiveRangeAlpha,
+ bool PositiveRangeBeta,
+ bool PositiveRangeGamma,
+ typename Derived>
+ static EulerAngles FromRotation(const MatrixBase<Derived>& m)
+ {
+ EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Derived, 3, 3)
+
+ EulerAngles e;
+ System::template CalcEulerAngles<
+ PositiveRangeAlpha, PositiveRangeBeta, PositiveRangeGamma, _Scalar>(e, m);
+ return e;
+ }
+
+ /** Constructs and initialize Euler angles from a rotation \p rot,
+ * with options to choose for each angle the requested range (__only in compile time__).
+ *
+ * If positive range is true, then the specified angle will be in the range [0, +2*PI].
+ * Otherwise, the specified angle will be in the range [-PI, +PI].
+ *
+ * \param rot The 3x3 rotation matrix to convert
+ * \tparam positiveRangeAlpha If true, alpha will be in [0, 2*PI]. Otherwise, in [-PI, +PI].
+ * \tparam positiveRangeBeta If true, beta will be in [0, 2*PI]. Otherwise, in [-PI, +PI].
+ * \tparam positiveRangeGamma If true, gamma will be in [0, 2*PI]. Otherwise, in [-PI, +PI].
+ */
+ template<
+ bool PositiveRangeAlpha,
+ bool PositiveRangeBeta,
+ bool PositiveRangeGamma,
+ typename Derived>
+ static EulerAngles FromRotation(const RotationBase<Derived, 3>& rot)
+ {
+ return FromRotation<PositiveRangeAlpha, PositiveRangeBeta, PositiveRangeGamma>(rot.toRotationMatrix());
+ }
+
+ /*EulerAngles& fromQuaternion(const QuaternionType& q)
+ {
+ // TODO: Implement it in a faster way for quaternions
+ // According to http://www.euclideanspace.com/maths/geometry/rotations/conversions/quaternionToEuler/
+ // we can compute only the needed matrix cells and then convert to euler angles. (see ZYX example below)
+ // Currently we compute all matrix cells from quaternion.
+
+ // Special case only for ZYX
+ //Scalar y2 = q.y() * q.y();
+ //m_angles[0] = std::atan2(2*(q.w()*q.z() + q.x()*q.y()), (1 - 2*(y2 + q.z()*q.z())));
+ //m_angles[1] = std::asin( 2*(q.w()*q.y() - q.z()*q.x()));
+ //m_angles[2] = std::atan2(2*(q.w()*q.x() + q.y()*q.z()), (1 - 2*(q.x()*q.x() + y2)));
+ }*/
+
+ /** Set \c *this from a rotation matrix(i.e. pure orthogonal matrix with determinant of +1). */
+ template<typename Derived>
+ EulerAngles& operator=(const MatrixBase<Derived>& m) {
+ EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Derived, 3, 3)
+
+ System::CalcEulerAngles(*this, m);
+ return *this;
+ }
+
+ // TODO: Assign and construct from another EulerAngles (with different system)
+
+ /** Set \c *this from a rotation. */
+ template<typename Derived>
+ EulerAngles& operator=(const RotationBase<Derived, 3>& rot) {
+ System::CalcEulerAngles(*this, rot.toRotationMatrix());
+ return *this;
+ }
+
+ // TODO: Support isApprox function
+
+ /** \returns an equivalent 3x3 rotation matrix. */
+ Matrix3 toRotationMatrix() const
+ {
+ return static_cast<QuaternionType>(*this).toRotationMatrix();
+ }
+
+ /** Convert the Euler angles to quaternion. */
+ operator QuaternionType() const
+ {
+ return
+ AngleAxisType(alpha(), AlphaAxisVector()) *
+ AngleAxisType(beta(), BetaAxisVector()) *
+ AngleAxisType(gamma(), GammaAxisVector());
+ }
+
+ friend std::ostream& operator<<(std::ostream& s, const EulerAngles<Scalar, System>& eulerAngles)
+ {
+ s << eulerAngles.angles().transpose();
+ return s;
+ }
+ };
+
+#define EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(AXES, SCALAR_TYPE, SCALAR_POSTFIX) \
+ /** \ingroup EulerAngles_Module */ \
+ typedef EulerAngles<SCALAR_TYPE, EulerSystem##AXES> EulerAngles##AXES##SCALAR_POSTFIX;
+
+#define EIGEN_EULER_ANGLES_TYPEDEFS(SCALAR_TYPE, SCALAR_POSTFIX) \
+ EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(XYZ, SCALAR_TYPE, SCALAR_POSTFIX) \
+ EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(XYX, SCALAR_TYPE, SCALAR_POSTFIX) \
+ EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(XZY, SCALAR_TYPE, SCALAR_POSTFIX) \
+ EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(XZX, SCALAR_TYPE, SCALAR_POSTFIX) \
+ \
+ EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(YZX, SCALAR_TYPE, SCALAR_POSTFIX) \
+ EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(YZY, SCALAR_TYPE, SCALAR_POSTFIX) \
+ EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(YXZ, SCALAR_TYPE, SCALAR_POSTFIX) \
+ EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(YXY, SCALAR_TYPE, SCALAR_POSTFIX) \
+ \
+ EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(ZXY, SCALAR_TYPE, SCALAR_POSTFIX) \
+ EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(ZXZ, SCALAR_TYPE, SCALAR_POSTFIX) \
+ EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(ZYX, SCALAR_TYPE, SCALAR_POSTFIX) \
+ EIGEN_EULER_ANGLES_SINGLE_TYPEDEF(ZYZ, SCALAR_TYPE, SCALAR_POSTFIX)
+
+EIGEN_EULER_ANGLES_TYPEDEFS(float, f)
+EIGEN_EULER_ANGLES_TYPEDEFS(double, d)
+
+ namespace internal
+ {
+ template<typename _Scalar, class _System>
+ struct traits<EulerAngles<_Scalar, _System> >
+ {
+ typedef _Scalar Scalar;
+ };
+ }
+
+}
+
+#endif // EIGEN_EULERANGLESCLASS_H
diff --git a/unsupported/Eigen/src/EulerAngles/EulerSystem.h b/unsupported/Eigen/src/EulerAngles/EulerSystem.h
new file mode 100644
index 000000000..82243e643
--- /dev/null
+++ b/unsupported/Eigen/src/EulerAngles/EulerSystem.h
@@ -0,0 +1,316 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Tal Hadad <tal_hd@hotmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_EULERSYSTEM_H
+#define EIGEN_EULERSYSTEM_H
+
+namespace Eigen
+{
+ // Forward declerations
+ template <typename _Scalar, class _System>
+ class EulerAngles;
+
+ namespace internal
+ {
+ // TODO: Check if already exists on the rest API
+ template <int Num, bool IsPositive = (Num > 0)>
+ struct Abs
+ {
+ enum { value = Num };
+ };
+
+ template <int Num>
+ struct Abs<Num, false>
+ {
+ enum { value = -Num };
+ };
+
+ template <int Axis>
+ struct IsValidAxis
+ {
+ enum { value = Axis != 0 && Abs<Axis>::value <= 3 };
+ };
+ }
+
+ #define EIGEN_EULER_ANGLES_CLASS_STATIC_ASSERT(COND,MSG) typedef char static_assertion_##MSG[(COND)?1:-1]
+
+ /** \brief Representation of a fixed signed rotation axis for EulerSystem.
+ *
+ * \ingroup EulerAngles_Module
+ *
+ * Values here represent:
+ * - The axis of the rotation: X, Y or Z.
+ * - The sign (i.e. direction of the rotation along the axis): positive(+) or negative(-)
+ *
+ * Therefore, this could express all the axes {+X,+Y,+Z,-X,-Y,-Z}
+ *
+ * For positive axis, use +EULER_{axis}, and for negative axis use -EULER_{axis}.
+ */
+ enum EulerAxis
+ {
+ EULER_X = 1, /*!< the X axis */
+ EULER_Y = 2, /*!< the Y axis */
+ EULER_Z = 3 /*!< the Z axis */
+ };
+
+ /** \class EulerSystem
+ *
+ * \ingroup EulerAngles_Module
+ *
+ * \brief Represents a fixed Euler rotation system.
+ *
+ * This meta-class goal is to represent the Euler system in compilation time, for EulerAngles.
+ *
+ * You can use this class to get two things:
+ * - Build an Euler system, and then pass it as a template parameter to EulerAngles.
+ * - Query some compile time data about an Euler system. (e.g. Whether it's tait bryan)
+ *
+ * Euler rotation is a set of three rotation on fixed axes. (see \ref EulerAngles)
+ * This meta-class store constantly those signed axes. (see \ref EulerAxis)
+ *
+ * ### Types of Euler systems ###
+ *
+ * All and only valid 3 dimension Euler rotation over standard
+ * signed axes{+X,+Y,+Z,-X,-Y,-Z} are supported:
+ * - all axes X, Y, Z in each valid order (see below what order is valid)
+ * - rotation over the axis is supported both over the positive and negative directions.
+ * - both tait bryan and proper/classic Euler angles (i.e. the opposite).
+ *
+ * Since EulerSystem support both positive and negative directions,
+ * you may call this rotation distinction in other names:
+ * - _right handed_ or _left handed_
+ * - _counterclockwise_ or _clockwise_
+ *
+ * Notice all axed combination are valid, and would trigger a static assertion.
+ * Same unsigned axes can't be neighbors, e.g. {X,X,Y} is invalid.
+ * This yield two and only two classes:
+ * - _tait bryan_ - all unsigned axes are distinct, e.g. {X,Y,Z}
+ * - _proper/classic Euler angles_ - The first and the third unsigned axes is equal,
+ * and the second is different, e.g. {X,Y,X}
+ *
+ * ### Intrinsic vs extrinsic Euler systems ###
+ *
+ * Only intrinsic Euler systems are supported for simplicity.
+ * If you want to use extrinsic Euler systems,
+ * just use the equal intrinsic opposite order for axes and angles.
+ * I.e axes (A,B,C) becomes (C,B,A), and angles (a,b,c) becomes (c,b,a).
+ *
+ * ### Convenient user typedefs ###
+ *
+ * Convenient typedefs for EulerSystem exist (only for positive axes Euler systems),
+ * in a form of EulerSystem{A}{B}{C}, e.g. \ref EulerSystemXYZ.
+ *
+ * ### Additional reading ###
+ *
+ * More information about Euler angles: https://en.wikipedia.org/wiki/Euler_angles
+ *
+ * \tparam _AlphaAxis the first fixed EulerAxis
+ *
+ * \tparam _AlphaAxis the second fixed EulerAxis
+ *
+ * \tparam _AlphaAxis the third fixed EulerAxis
+ */
+ template <int _AlphaAxis, int _BetaAxis, int _GammaAxis>
+ class EulerSystem
+ {
+ public:
+ // It's defined this way and not as enum, because I think
+ // that enum is not guerantee to support negative numbers
+
+ /** The first rotation axis */
+ static const int AlphaAxis = _AlphaAxis;
+
+ /** The second rotation axis */
+ static const int BetaAxis = _BetaAxis;
+
+ /** The third rotation axis */
+ static const int GammaAxis = _GammaAxis;
+
+ enum
+ {
+ AlphaAxisAbs = internal::Abs<AlphaAxis>::value, /*!< the first rotation axis unsigned */
+ BetaAxisAbs = internal::Abs<BetaAxis>::value, /*!< the second rotation axis unsigned */
+ GammaAxisAbs = internal::Abs<GammaAxis>::value, /*!< the third rotation axis unsigned */
+
+ IsAlphaOpposite = (AlphaAxis < 0) ? 1 : 0, /*!< weather alpha axis is negative */
+ IsBetaOpposite = (BetaAxis < 0) ? 1 : 0, /*!< weather beta axis is negative */
+ IsGammaOpposite = (GammaAxis < 0) ? 1 : 0, /*!< weather gamma axis is negative */
+
+ IsOdd = ((AlphaAxisAbs)%3 == (BetaAxisAbs - 1)%3) ? 0 : 1, /*!< weather the Euler system is odd */
+ IsEven = IsOdd ? 0 : 1, /*!< weather the Euler system is even */
+
+ IsTaitBryan = ((unsigned)AlphaAxisAbs != (unsigned)GammaAxisAbs) ? 1 : 0 /*!< weather the Euler system is tait bryan */
+ };
+
+ private:
+
+ EIGEN_EULER_ANGLES_CLASS_STATIC_ASSERT(internal::IsValidAxis<AlphaAxis>::value,
+ ALPHA_AXIS_IS_INVALID);
+
+ EIGEN_EULER_ANGLES_CLASS_STATIC_ASSERT(internal::IsValidAxis<BetaAxis>::value,
+ BETA_AXIS_IS_INVALID);
+
+ EIGEN_EULER_ANGLES_CLASS_STATIC_ASSERT(internal::IsValidAxis<GammaAxis>::value,
+ GAMMA_AXIS_IS_INVALID);
+
+ EIGEN_EULER_ANGLES_CLASS_STATIC_ASSERT((unsigned)AlphaAxisAbs != (unsigned)BetaAxisAbs,
+ ALPHA_AXIS_CANT_BE_EQUAL_TO_BETA_AXIS);
+
+ EIGEN_EULER_ANGLES_CLASS_STATIC_ASSERT((unsigned)BetaAxisAbs != (unsigned)GammaAxisAbs,
+ BETA_AXIS_CANT_BE_EQUAL_TO_GAMMA_AXIS);
+
+ enum
+ {
+ // I, J, K are the pivot indexes permutation for the rotation matrix, that match this Euler system.
+ // They are used in this class converters.
+ // They are always different from each other, and their possible values are: 0, 1, or 2.
+ I = AlphaAxisAbs - 1,
+ J = (AlphaAxisAbs - 1 + 1 + IsOdd)%3,
+ K = (AlphaAxisAbs - 1 + 2 - IsOdd)%3
+ };
+
+ // TODO: Get @mat parameter in form that avoids double evaluation.
+ template <typename Derived>
+ static void CalcEulerAngles_imp(Matrix<typename MatrixBase<Derived>::Scalar, 3, 1>& res, const MatrixBase<Derived>& mat, internal::true_type /*isTaitBryan*/)
+ {
+ using std::atan2;
+ using std::sin;
+ using std::cos;
+
+ typedef typename Derived::Scalar Scalar;
+ typedef Matrix<Scalar,2,1> Vector2;
+
+ res[0] = atan2(mat(J,K), mat(K,K));
+ Scalar c2 = Vector2(mat(I,I), mat(I,J)).norm();
+ if((IsOdd && res[0]<Scalar(0)) || ((!IsOdd) && res[0]>Scalar(0))) {
+ res[0] = (res[0] > Scalar(0)) ? res[0] - Scalar(EIGEN_PI) : res[0] + Scalar(EIGEN_PI);
+ res[1] = atan2(-mat(I,K), -c2);
+ }
+ else
+ res[1] = atan2(-mat(I,K), c2);
+ Scalar s1 = sin(res[0]);
+ Scalar c1 = cos(res[0]);
+ res[2] = atan2(s1*mat(K,I)-c1*mat(J,I), c1*mat(J,J) - s1 * mat(K,J));
+ }
+
+ template <typename Derived>
+ static void CalcEulerAngles_imp(Matrix<typename MatrixBase<Derived>::Scalar,3,1>& res, const MatrixBase<Derived>& mat, internal::false_type /*isTaitBryan*/)
+ {
+ using std::atan2;
+ using std::sin;
+ using std::cos;
+
+ typedef typename Derived::Scalar Scalar;
+ typedef Matrix<Scalar,2,1> Vector2;
+
+ res[0] = atan2(mat(J,I), mat(K,I));
+ if((IsOdd && res[0]<Scalar(0)) || ((!IsOdd) && res[0]>Scalar(0)))
+ {
+ res[0] = (res[0] > Scalar(0)) ? res[0] - Scalar(EIGEN_PI) : res[0] + Scalar(EIGEN_PI);
+ Scalar s2 = Vector2(mat(J,I), mat(K,I)).norm();
+ res[1] = -atan2(s2, mat(I,I));
+ }
+ else
+ {
+ Scalar s2 = Vector2(mat(J,I), mat(K,I)).norm();
+ res[1] = atan2(s2, mat(I,I));
+ }
+
+ // With a=(0,1,0), we have i=0; j=1; k=2, and after computing the first two angles,
+ // we can compute their respective rotation, and apply its inverse to M. Since the result must
+ // be a rotation around x, we have:
+ //
+ // c2 s1.s2 c1.s2 1 0 0
+ // 0 c1 -s1 * M = 0 c3 s3
+ // -s2 s1.c2 c1.c2 0 -s3 c3
+ //
+ // Thus: m11.c1 - m21.s1 = c3 & m12.c1 - m22.s1 = s3
+
+ Scalar s1 = sin(res[0]);
+ Scalar c1 = cos(res[0]);
+ res[2] = atan2(c1*mat(J,K)-s1*mat(K,K), c1*mat(J,J) - s1 * mat(K,J));
+ }
+
+ template<typename Scalar>
+ static void CalcEulerAngles(
+ EulerAngles<Scalar, EulerSystem>& res,
+ const typename EulerAngles<Scalar, EulerSystem>::Matrix3& mat)
+ {
+ CalcEulerAngles(res, mat, false, false, false);
+ }
+
+ template<
+ bool PositiveRangeAlpha,
+ bool PositiveRangeBeta,
+ bool PositiveRangeGamma,
+ typename Scalar>
+ static void CalcEulerAngles(
+ EulerAngles<Scalar, EulerSystem>& res,
+ const typename EulerAngles<Scalar, EulerSystem>::Matrix3& mat)
+ {
+ CalcEulerAngles(res, mat, PositiveRangeAlpha, PositiveRangeBeta, PositiveRangeGamma);
+ }
+
+ template<typename Scalar>
+ static void CalcEulerAngles(
+ EulerAngles<Scalar, EulerSystem>& res,
+ const typename EulerAngles<Scalar, EulerSystem>::Matrix3& mat,
+ bool PositiveRangeAlpha,
+ bool PositiveRangeBeta,
+ bool PositiveRangeGamma)
+ {
+ CalcEulerAngles_imp(
+ res.angles(), mat,
+ typename internal::conditional<IsTaitBryan, internal::true_type, internal::false_type>::type());
+
+ if (IsAlphaOpposite == IsOdd)
+ res.alpha() = -res.alpha();
+
+ if (IsBetaOpposite == IsOdd)
+ res.beta() = -res.beta();
+
+ if (IsGammaOpposite == IsOdd)
+ res.gamma() = -res.gamma();
+
+ // Saturate results to the requested range
+ if (PositiveRangeAlpha && (res.alpha() < 0))
+ res.alpha() += Scalar(2 * EIGEN_PI);
+
+ if (PositiveRangeBeta && (res.beta() < 0))
+ res.beta() += Scalar(2 * EIGEN_PI);
+
+ if (PositiveRangeGamma && (res.gamma() < 0))
+ res.gamma() += Scalar(2 * EIGEN_PI);
+ }
+
+ template <typename _Scalar, class _System>
+ friend class Eigen::EulerAngles;
+ };
+
+#define EIGEN_EULER_SYSTEM_TYPEDEF(A, B, C) \
+ /** \ingroup EulerAngles_Module */ \
+ typedef EulerSystem<EULER_##A, EULER_##B, EULER_##C> EulerSystem##A##B##C;
+
+ EIGEN_EULER_SYSTEM_TYPEDEF(X,Y,Z)
+ EIGEN_EULER_SYSTEM_TYPEDEF(X,Y,X)
+ EIGEN_EULER_SYSTEM_TYPEDEF(X,Z,Y)
+ EIGEN_EULER_SYSTEM_TYPEDEF(X,Z,X)
+
+ EIGEN_EULER_SYSTEM_TYPEDEF(Y,Z,X)
+ EIGEN_EULER_SYSTEM_TYPEDEF(Y,Z,Y)
+ EIGEN_EULER_SYSTEM_TYPEDEF(Y,X,Z)
+ EIGEN_EULER_SYSTEM_TYPEDEF(Y,X,Y)
+
+ EIGEN_EULER_SYSTEM_TYPEDEF(Z,X,Y)
+ EIGEN_EULER_SYSTEM_TYPEDEF(Z,X,Z)
+ EIGEN_EULER_SYSTEM_TYPEDEF(Z,Y,X)
+ EIGEN_EULER_SYSTEM_TYPEDEF(Z,Y,Z)
+}
+
+#endif // EIGEN_EULERSYSTEM_H
diff --git a/unsupported/Eigen/src/FFT/CMakeLists.txt b/unsupported/Eigen/src/FFT/CMakeLists.txt
deleted file mode 100644
index edcffcb18..000000000
--- a/unsupported/Eigen/src/FFT/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_FFT_SRCS "*.h")
-
-INSTALL(FILES
- ${Eigen_FFT_SRCS}
- DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/FFT COMPONENT Devel
- )
diff --git a/unsupported/Eigen/src/IterativeSolvers/CMakeLists.txt b/unsupported/Eigen/src/IterativeSolvers/CMakeLists.txt
deleted file mode 100644
index 7986afc5e..000000000
--- a/unsupported/Eigen/src/IterativeSolvers/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_IterativeSolvers_SRCS "*.h")
-
-INSTALL(FILES
- ${Eigen_IterativeSolvers_SRCS}
- DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/IterativeSolvers COMPONENT Devel
- )
diff --git a/unsupported/Eigen/src/KroneckerProduct/CMakeLists.txt b/unsupported/Eigen/src/KroneckerProduct/CMakeLists.txt
deleted file mode 100644
index 4daefebee..000000000
--- a/unsupported/Eigen/src/KroneckerProduct/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_KroneckerProduct_SRCS "*.h")
-
-INSTALL(FILES
- ${Eigen_KroneckerProduct_SRCS}
- DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/KroneckerProduct COMPONENT Devel
- )
diff --git a/unsupported/Eigen/src/LevenbergMarquardt/CMakeLists.txt b/unsupported/Eigen/src/LevenbergMarquardt/CMakeLists.txt
deleted file mode 100644
index d9690854d..000000000
--- a/unsupported/Eigen/src/LevenbergMarquardt/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_LevenbergMarquardt_SRCS "*.h")
-
-INSTALL(FILES
- ${Eigen_LevenbergMarquardt_SRCS}
- DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/LevenbergMarquardt COMPONENT Devel
- )
diff --git a/unsupported/Eigen/src/MatrixFunctions/CMakeLists.txt b/unsupported/Eigen/src/MatrixFunctions/CMakeLists.txt
deleted file mode 100644
index cdde64d2c..000000000
--- a/unsupported/Eigen/src/MatrixFunctions/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_MatrixFunctions_SRCS "*.h")
-
-INSTALL(FILES
- ${Eigen_MatrixFunctions_SRCS}
- DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/MatrixFunctions COMPONENT Devel
- )
diff --git a/unsupported/Eigen/src/MoreVectorization/CMakeLists.txt b/unsupported/Eigen/src/MoreVectorization/CMakeLists.txt
deleted file mode 100644
index 1b887cc8e..000000000
--- a/unsupported/Eigen/src/MoreVectorization/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_MoreVectorization_SRCS "*.h")
-
-INSTALL(FILES
- ${Eigen_MoreVectorization_SRCS}
- DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/MoreVectorization COMPONENT Devel
- )
diff --git a/unsupported/Eigen/src/NonLinearOptimization/CMakeLists.txt b/unsupported/Eigen/src/NonLinearOptimization/CMakeLists.txt
deleted file mode 100644
index 9322ddadf..000000000
--- a/unsupported/Eigen/src/NonLinearOptimization/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_NonLinearOptimization_SRCS "*.h")
-
-INSTALL(FILES
- ${Eigen_NonLinearOptimization_SRCS}
- DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/NonLinearOptimization COMPONENT Devel
- )
diff --git a/unsupported/Eigen/src/NumericalDiff/CMakeLists.txt b/unsupported/Eigen/src/NumericalDiff/CMakeLists.txt
deleted file mode 100644
index 1199aca2f..000000000
--- a/unsupported/Eigen/src/NumericalDiff/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_NumericalDiff_SRCS "*.h")
-
-INSTALL(FILES
- ${Eigen_NumericalDiff_SRCS}
- DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/NumericalDiff COMPONENT Devel
- )
diff --git a/unsupported/Eigen/src/Polynomials/CMakeLists.txt b/unsupported/Eigen/src/Polynomials/CMakeLists.txt
deleted file mode 100644
index 51f13f3cb..000000000
--- a/unsupported/Eigen/src/Polynomials/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_Polynomials_SRCS "*.h")
-
-INSTALL(FILES
- ${Eigen_Polynomials_SRCS}
- DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/Polynomials COMPONENT Devel
- )
diff --git a/unsupported/Eigen/src/Skyline/CMakeLists.txt b/unsupported/Eigen/src/Skyline/CMakeLists.txt
deleted file mode 100644
index 3bf1b0dd4..000000000
--- a/unsupported/Eigen/src/Skyline/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_Skyline_SRCS "*.h")
-
-INSTALL(FILES
- ${Eigen_Skyline_SRCS}
- DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/Skyline COMPONENT Devel
- )
diff --git a/unsupported/Eigen/src/SparseExtra/CMakeLists.txt b/unsupported/Eigen/src/SparseExtra/CMakeLists.txt
deleted file mode 100644
index 7ea32ca5e..000000000
--- a/unsupported/Eigen/src/SparseExtra/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_SparseExtra_SRCS "*.h")
-
-INSTALL(FILES
- ${Eigen_SparseExtra_SRCS}
- DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/SparseExtra COMPONENT Devel
- )
diff --git a/unsupported/Eigen/src/SpecialFunctions/CMakeLists.txt b/unsupported/Eigen/src/SpecialFunctions/CMakeLists.txt
deleted file mode 100644
index 25df9439d..000000000
--- a/unsupported/Eigen/src/SpecialFunctions/CMakeLists.txt
+++ /dev/null
@@ -1,11 +0,0 @@
-FILE(GLOB Eigen_SpecialFunctions_SRCS "*.h")
-INSTALL(FILES
- ${Eigen_SpecialFunctions_SRCS}
- DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/SpecialFunctions COMPONENT Devel
- )
-
-FILE(GLOB Eigen_SpecialFunctions_arch_CUDA_SRCS "arch/CUDA/*.h")
-INSTALL(FILES
- ${Eigen_SpecialFunctions_arch_CUDA_SRCS}
- DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/SpecialFunctions/arch/CUDA COMPONENT Devel
- ) \ No newline at end of file
diff --git a/unsupported/Eigen/src/Splines/CMakeLists.txt b/unsupported/Eigen/src/Splines/CMakeLists.txt
deleted file mode 100644
index 55c6271e9..000000000
--- a/unsupported/Eigen/src/Splines/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-FILE(GLOB Eigen_Splines_SRCS "*.h")
-
-INSTALL(FILES
- ${Eigen_Splines_SRCS}
- DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/Splines COMPONENT Devel
- )
diff --git a/unsupported/doc/examples/EulerAngles.cpp b/unsupported/doc/examples/EulerAngles.cpp
new file mode 100644
index 000000000..1ef6aee18
--- /dev/null
+++ b/unsupported/doc/examples/EulerAngles.cpp
@@ -0,0 +1,46 @@
+#include <unsupported/Eigen/EulerAngles>
+#include <iostream>
+
+using namespace Eigen;
+
+int main()
+{
+ // A common Euler system by many armies around the world,
+ // where the first one is the azimuth(the angle from the north -
+ // the same angle that is show in compass)
+ // and the second one is elevation(the angle from the horizon)
+ // and the third one is roll(the angle between the horizontal body
+ // direction and the plane ground surface)
+ // Keep remembering we're using radian angles here!
+ typedef EulerSystem<-EULER_Z, EULER_Y, EULER_X> MyArmySystem;
+ typedef EulerAngles<double, MyArmySystem> MyArmyAngles;
+
+ MyArmyAngles vehicleAngles(
+ 3.14/*PI*/ / 2, /* heading to east, notice that this angle is counter-clockwise */
+ -0.3, /* going down from a mountain */
+ 0.1); /* slightly rolled to the right */
+
+ // Some Euler angles representation that our plane use.
+ EulerAnglesZYZd planeAngles(0.78474, 0.5271, -0.513794);
+
+ MyArmyAngles planeAnglesInMyArmyAngles = MyArmyAngles::FromRotation<true, false, false>(planeAngles);
+
+ std::cout << "vehicle angles(MyArmy): " << vehicleAngles << std::endl;
+ std::cout << "plane angles(ZYZ): " << planeAngles << std::endl;
+ std::cout << "plane angles(MyArmy): " << planeAnglesInMyArmyAngles << std::endl;
+
+ // Now lets rotate the plane a little bit
+ std::cout << "==========================================================\n";
+ std::cout << "rotating plane now!\n";
+ std::cout << "==========================================================\n";
+
+ Quaterniond planeRotated = AngleAxisd(-0.342, Vector3d::UnitY()) * planeAngles;
+
+ planeAngles = planeRotated;
+ planeAnglesInMyArmyAngles = MyArmyAngles::FromRotation<true, false, false>(planeRotated);
+
+ std::cout << "new plane angles(ZYZ): " << planeAngles << std::endl;
+ std::cout << "new plane angles(MyArmy): " << planeAnglesInMyArmyAngles << std::endl;
+
+ return 0;
+}
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index de9b5243a..0d7ed1db2 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -59,6 +59,8 @@ ei_add_test(alignedvector3)
ei_add_test(FFT)
+ei_add_test(EulerAngles)
+
find_package(MPFR 2.3.0)
find_package(GMP)
if(MPFR_FOUND AND EIGEN_COMPILER_SUPPORT_CXX11)
@@ -230,20 +232,25 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA)
cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
- ei_add_test(cxx11_tensor_device)
- ei_add_test(cxx11_tensor_cuda)
- ei_add_test(cxx11_tensor_contract_cuda)
+ ei_add_test(cxx11_tensor_complex_cuda)
ei_add_test(cxx11_tensor_reduction_cuda)
ei_add_test(cxx11_tensor_argmax_cuda)
ei_add_test(cxx11_tensor_cast_float16_cuda)
ei_add_test(cxx11_tensor_scan_cuda)
+ # Contractions require arch 3.0 or higher
+ if (${EIGEN_CUDA_COMPUTE_ARCH} GREATER 29)
+ ei_add_test(cxx11_tensor_device)
+ ei_add_test(cxx11_tensor_cuda)
+ ei_add_test(cxx11_tensor_contract_cuda)
+ ei_add_test(cxx11_tensor_of_float16_cuda)
+ endif()
+
# The random number generation code requires arch 3.5 or greater.
if (${EIGEN_CUDA_COMPUTE_ARCH} GREATER 34)
ei_add_test(cxx11_tensor_random_cuda)
endif()
- ei_add_test(cxx11_tensor_of_float16_cuda)
unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
endif()
diff --git a/unsupported/test/EulerAngles.cpp b/unsupported/test/EulerAngles.cpp
new file mode 100644
index 000000000..a8cb52864
--- /dev/null
+++ b/unsupported/test/EulerAngles.cpp
@@ -0,0 +1,208 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Tal Hadad <tal_hd@hotmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+
+#include <unsupported/Eigen/EulerAngles>
+
+using namespace Eigen;
+
+template<typename EulerSystem, typename Scalar>
+void verify_euler_ranged(const Matrix<Scalar,3,1>& ea,
+ bool positiveRangeAlpha, bool positiveRangeBeta, bool positiveRangeGamma)
+{
+ typedef EulerAngles<Scalar, EulerSystem> EulerAnglesType;
+ typedef Matrix<Scalar,3,3> Matrix3;
+ typedef Matrix<Scalar,3,1> Vector3;
+ typedef Quaternion<Scalar> QuaternionType;
+ typedef AngleAxis<Scalar> AngleAxisType;
+ using std::abs;
+
+ Scalar alphaRangeStart, alphaRangeEnd;
+ Scalar betaRangeStart, betaRangeEnd;
+ Scalar gammaRangeStart, gammaRangeEnd;
+
+ if (positiveRangeAlpha)
+ {
+ alphaRangeStart = Scalar(0);
+ alphaRangeEnd = Scalar(2 * EIGEN_PI);
+ }
+ else
+ {
+ alphaRangeStart = -Scalar(EIGEN_PI);
+ alphaRangeEnd = Scalar(EIGEN_PI);
+ }
+
+ if (positiveRangeBeta)
+ {
+ betaRangeStart = Scalar(0);
+ betaRangeEnd = Scalar(2 * EIGEN_PI);
+ }
+ else
+ {
+ betaRangeStart = -Scalar(EIGEN_PI);
+ betaRangeEnd = Scalar(EIGEN_PI);
+ }
+
+ if (positiveRangeGamma)
+ {
+ gammaRangeStart = Scalar(0);
+ gammaRangeEnd = Scalar(2 * EIGEN_PI);
+ }
+ else
+ {
+ gammaRangeStart = -Scalar(EIGEN_PI);
+ gammaRangeEnd = Scalar(EIGEN_PI);
+ }
+
+ const int i = EulerSystem::AlphaAxisAbs - 1;
+ const int j = EulerSystem::BetaAxisAbs - 1;
+ const int k = EulerSystem::GammaAxisAbs - 1;
+
+ const int iFactor = EulerSystem::IsAlphaOpposite ? -1 : 1;
+ const int jFactor = EulerSystem::IsBetaOpposite ? -1 : 1;
+ const int kFactor = EulerSystem::IsGammaOpposite ? -1 : 1;
+
+ const Vector3 I = EulerAnglesType::AlphaAxisVector();
+ const Vector3 J = EulerAnglesType::BetaAxisVector();
+ const Vector3 K = EulerAnglesType::GammaAxisVector();
+
+ EulerAnglesType e(ea[0], ea[1], ea[2]);
+
+ Matrix3 m(e);
+ Vector3 eabis = EulerAnglesType(m, positiveRangeAlpha, positiveRangeBeta, positiveRangeGamma).angles();
+
+ // Check that eabis in range
+ VERIFY(alphaRangeStart <= eabis[0] && eabis[0] <= alphaRangeEnd);
+ VERIFY(betaRangeStart <= eabis[1] && eabis[1] <= betaRangeEnd);
+ VERIFY(gammaRangeStart <= eabis[2] && eabis[2] <= gammaRangeEnd);
+
+ Vector3 eabis2 = m.eulerAngles(i, j, k);
+
+ // Invert the relevant axes
+ eabis2[0] *= iFactor;
+ eabis2[1] *= jFactor;
+ eabis2[2] *= kFactor;
+
+ // Saturate the angles to the correct range
+ if (positiveRangeAlpha && (eabis2[0] < 0))
+ eabis2[0] += Scalar(2 * EIGEN_PI);
+ if (positiveRangeBeta && (eabis2[1] < 0))
+ eabis2[1] += Scalar(2 * EIGEN_PI);
+ if (positiveRangeGamma && (eabis2[2] < 0))
+ eabis2[2] += Scalar(2 * EIGEN_PI);
+
+ VERIFY_IS_APPROX(eabis, eabis2);// Verify that our estimation is the same as m.eulerAngles() is
+
+ Matrix3 mbis(AngleAxisType(eabis[0], I) * AngleAxisType(eabis[1], J) * AngleAxisType(eabis[2], K));
+ VERIFY_IS_APPROX(m, mbis);
+
+ // Tests that are only relevant for no possitive range
+ if (!(positiveRangeAlpha || positiveRangeBeta || positiveRangeGamma))
+ {
+ /* If I==K, and ea[1]==0, then there no unique solution. */
+ /* The remark apply in the case where I!=K, and |ea[1]| is close to pi/2. */
+ if( (i!=k || ea[1]!=0) && (i==k || !internal::isApprox(abs(ea[1]),Scalar(EIGEN_PI/2),test_precision<Scalar>())) )
+ VERIFY((ea-eabis).norm() <= test_precision<Scalar>());
+
+ // approx_or_less_than does not work for 0
+ VERIFY(0 < eabis[0] || test_isMuchSmallerThan(eabis[0], Scalar(1)));
+ }
+
+ // Quaternions
+ QuaternionType q(e);
+ eabis = EulerAnglesType(q, positiveRangeAlpha, positiveRangeBeta, positiveRangeGamma).angles();
+ VERIFY_IS_APPROX(eabis, eabis2);// Verify that the euler angles are still the same
+}
+
+template<typename EulerSystem, typename Scalar>
+void verify_euler(const Matrix<Scalar,3,1>& ea)
+{
+ verify_euler_ranged<EulerSystem>(ea, false, false, false);
+ verify_euler_ranged<EulerSystem>(ea, false, false, true);
+ verify_euler_ranged<EulerSystem>(ea, false, true, false);
+ verify_euler_ranged<EulerSystem>(ea, false, true, true);
+ verify_euler_ranged<EulerSystem>(ea, true, false, false);
+ verify_euler_ranged<EulerSystem>(ea, true, false, true);
+ verify_euler_ranged<EulerSystem>(ea, true, true, false);
+ verify_euler_ranged<EulerSystem>(ea, true, true, true);
+}
+
+template<typename Scalar> void check_all_var(const Matrix<Scalar,3,1>& ea)
+{
+ verify_euler<EulerSystemXYZ>(ea);
+ verify_euler<EulerSystemXYX>(ea);
+ verify_euler<EulerSystemXZY>(ea);
+ verify_euler<EulerSystemXZX>(ea);
+
+ verify_euler<EulerSystemYZX>(ea);
+ verify_euler<EulerSystemYZY>(ea);
+ verify_euler<EulerSystemYXZ>(ea);
+ verify_euler<EulerSystemYXY>(ea);
+
+ verify_euler<EulerSystemZXY>(ea);
+ verify_euler<EulerSystemZXZ>(ea);
+ verify_euler<EulerSystemZYX>(ea);
+ verify_euler<EulerSystemZYZ>(ea);
+}
+
+template<typename Scalar> void eulerangles()
+{
+ typedef Matrix<Scalar,3,3> Matrix3;
+ typedef Matrix<Scalar,3,1> Vector3;
+ typedef Array<Scalar,3,1> Array3;
+ typedef Quaternion<Scalar> Quaternionx;
+ typedef AngleAxis<Scalar> AngleAxisType;
+
+ Scalar a = internal::random<Scalar>(-Scalar(EIGEN_PI), Scalar(EIGEN_PI));
+ Quaternionx q1;
+ q1 = AngleAxisType(a, Vector3::Random().normalized());
+ Matrix3 m;
+ m = q1;
+
+ Vector3 ea = m.eulerAngles(0,1,2);
+ check_all_var(ea);
+ ea = m.eulerAngles(0,1,0);
+ check_all_var(ea);
+
+ // Check with purely random Quaternion:
+ q1.coeffs() = Quaternionx::Coefficients::Random().normalized();
+ m = q1;
+ ea = m.eulerAngles(0,1,2);
+ check_all_var(ea);
+ ea = m.eulerAngles(0,1,0);
+ check_all_var(ea);
+
+ // Check with random angles in range [0:pi]x[-pi:pi]x[-pi:pi].
+ ea = (Array3::Random() + Array3(1,0,0))*Scalar(EIGEN_PI)*Array3(0.5,1,1);
+ check_all_var(ea);
+
+ ea[2] = ea[0] = internal::random<Scalar>(0,Scalar(EIGEN_PI));
+ check_all_var(ea);
+
+ ea[0] = ea[1] = internal::random<Scalar>(0,Scalar(EIGEN_PI));
+ check_all_var(ea);
+
+ ea[1] = 0;
+ check_all_var(ea);
+
+ ea.head(2).setZero();
+ check_all_var(ea);
+
+ ea.setZero();
+ check_all_var(ea);
+}
+
+void test_EulerAngles()
+{
+ for(int i = 0; i < g_repeat; i++) {
+ CALL_SUBTEST_1( eulerangles<float>() );
+ CALL_SUBTEST_2( eulerangles<double>() );
+ }
+}
diff --git a/unsupported/test/cxx11_eventcount.cpp b/unsupported/test/cxx11_eventcount.cpp
index f16cc6f07..3b598bf42 100644
--- a/unsupported/test/cxx11_eventcount.cpp
+++ b/unsupported/test/cxx11_eventcount.cpp
@@ -25,7 +25,8 @@ int rand_reentrant(unsigned int* s) {
static void test_basic_eventcount()
{
- std::vector<EventCount::Waiter> waiters(1);
+ MaxSizeVector<EventCount::Waiter> waiters(1);
+ waiters.resize(1);
EventCount ec(waiters);
EventCount::Waiter& w = waiters[0];
ec.Notify(false);
@@ -81,7 +82,8 @@ static void test_stress_eventcount()
static const int kEvents = 1 << 16;
static const int kQueues = 10;
- std::vector<EventCount::Waiter> waiters(kThreads);
+ MaxSizeVector<EventCount::Waiter> waiters(kThreads);
+ waiters.resize(kThreads);
EventCount ec(waiters);
TestQueue queues[kQueues];
diff --git a/unsupported/test/cxx11_tensor_argmax_cuda.cu b/unsupported/test/cxx11_tensor_argmax_cuda.cu
index 41ccbe974..6fe8982f2 100644
--- a/unsupported/test/cxx11_tensor_argmax_cuda.cu
+++ b/unsupported/test/cxx11_tensor_argmax_cuda.cu
@@ -12,6 +12,9 @@
#define EIGEN_TEST_FUNC cxx11_tensor_cuda
#define EIGEN_USE_GPU
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
diff --git a/unsupported/test/cxx11_tensor_cast_float16_cuda.cu b/unsupported/test/cxx11_tensor_cast_float16_cuda.cu
index f22b99de8..88c233994 100644
--- a/unsupported/test/cxx11_tensor_cast_float16_cuda.cu
+++ b/unsupported/test/cxx11_tensor_cast_float16_cuda.cu
@@ -13,7 +13,9 @@
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#define EIGEN_USE_GPU
-
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
diff --git a/unsupported/test/cxx11_tensor_complex_cuda.cu b/unsupported/test/cxx11_tensor_complex_cuda.cu
new file mode 100644
index 000000000..74befe670
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_complex_cuda.cu
@@ -0,0 +1,78 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_FUNC cxx11_tensor_complex
+#define EIGEN_USE_GPU
+
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+void test_cuda_nullary() {
+ Tensor<std::complex<float>, 1, 0, int> in1(2);
+ Tensor<std::complex<float>, 1, 0, int> in2(2);
+ in1.setRandom();
+ in2.setRandom();
+
+ std::size_t float_bytes = in1.size() * sizeof(float);
+ std::size_t complex_bytes = in1.size() * sizeof(std::complex<float>);
+
+ std::complex<float>* d_in1;
+ std::complex<float>* d_in2;
+ float* d_out2;
+ cudaMalloc((void**)(&d_in1), complex_bytes);
+ cudaMalloc((void**)(&d_in2), complex_bytes);
+ cudaMalloc((void**)(&d_out2), float_bytes);
+ cudaMemcpy(d_in1, in1.data(), complex_bytes, cudaMemcpyHostToDevice);
+ cudaMemcpy(d_in2, in2.data(), complex_bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<std::complex<float>, 1, 0, int>, Eigen::Aligned> gpu_in1(
+ d_in1, 2);
+ Eigen::TensorMap<Eigen::Tensor<std::complex<float>, 1, 0, int>, Eigen::Aligned> gpu_in2(
+ d_in2, 2);
+ Eigen::TensorMap<Eigen::Tensor<float, 1, 0, int>, Eigen::Aligned> gpu_out2(
+ d_out2, 2);
+
+ gpu_in1.device(gpu_device) = gpu_in1.constant(std::complex<float>(3.14f, 2.7f));
+ gpu_out2.device(gpu_device) = gpu_in2.abs();
+
+ Tensor<std::complex<float>, 1, 0, int> new1(2);
+ Tensor<float, 1, 0, int> new2(2);
+
+ assert(cudaMemcpyAsync(new1.data(), d_in1, complex_bytes, cudaMemcpyDeviceToHost,
+ gpu_device.stream()) == cudaSuccess);
+ assert(cudaMemcpyAsync(new2.data(), d_out2, float_bytes, cudaMemcpyDeviceToHost,
+ gpu_device.stream()) == cudaSuccess);
+
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ for (int i = 0; i < 2; ++i) {
+ VERIFY_IS_APPROX(new1(i), std::complex<float>(3.14f, 2.7f));
+ VERIFY_IS_APPROX(new2(i), std::abs(in2(i)));
+ }
+
+ cudaFree(d_in1);
+ cudaFree(d_in2);
+ cudaFree(d_out2);
+}
+
+
+
+void test_cxx11_tensor_complex()
+{
+ CALL_SUBTEST(test_cuda_nullary());
+}
diff --git a/unsupported/test/cxx11_tensor_contract_cuda.cu b/unsupported/test/cxx11_tensor_contract_cuda.cu
index 98ac180ef..767e9c678 100644
--- a/unsupported/test/cxx11_tensor_contract_cuda.cu
+++ b/unsupported/test/cxx11_tensor_contract_cuda.cu
@@ -14,7 +14,9 @@
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#define EIGEN_USE_GPU
-
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
diff --git a/unsupported/test/cxx11_tensor_contraction.cpp b/unsupported/test/cxx11_tensor_contraction.cpp
index 73623b2ed..ace97057f 100644
--- a/unsupported/test/cxx11_tensor_contraction.cpp
+++ b/unsupported/test/cxx11_tensor_contraction.cpp
@@ -489,6 +489,27 @@ static void test_tensor_product()
}
+template<int DataLayout>
+static void test_const_inputs()
+{
+ Tensor<float, 2, DataLayout> in1(2, 3);
+ Tensor<float, 2, DataLayout> in2(3, 2);
+ in1.setRandom();
+ in2.setRandom();
+
+ TensorMap<Tensor<const float, 2, DataLayout> > mat1(in1.data(), 2, 3);
+ TensorMap<Tensor<const float, 2, DataLayout> > mat2(in2.data(), 3, 2);
+ Tensor<float, 2, DataLayout> mat3(2,2);
+
+ Eigen::array<DimPair, 1> dims = {{DimPair(1, 0)}};
+ mat3 = mat1.contract(mat2, dims);
+
+ VERIFY_IS_APPROX(mat3(0,0), mat1(0,0)*mat2(0,0) + mat1(0,1)*mat2(1,0) + mat1(0,2)*mat2(2,0));
+ VERIFY_IS_APPROX(mat3(0,1), mat1(0,0)*mat2(0,1) + mat1(0,1)*mat2(1,1) + mat1(0,2)*mat2(2,1));
+ VERIFY_IS_APPROX(mat3(1,0), mat1(1,0)*mat2(0,0) + mat1(1,1)*mat2(1,0) + mat1(1,2)*mat2(2,0));
+ VERIFY_IS_APPROX(mat3(1,1), mat1(1,0)*mat2(0,1) + mat1(1,1)*mat2(1,1) + mat1(1,2)*mat2(2,1));
+}
+
void test_cxx11_tensor_contraction()
{
CALL_SUBTEST(test_evals<ColMajor>());
@@ -519,4 +540,6 @@ void test_cxx11_tensor_contraction()
CALL_SUBTEST(test_small_blocking_factors<RowMajor>());
CALL_SUBTEST(test_tensor_product<ColMajor>());
CALL_SUBTEST(test_tensor_product<RowMajor>());
+ CALL_SUBTEST(test_const_inputs<ColMajor>());
+ CALL_SUBTEST(test_const_inputs<RowMajor>());
}
diff --git a/unsupported/test/cxx11_tensor_cuda.cu b/unsupported/test/cxx11_tensor_cuda.cu
index 284b46803..bf216587a 100644
--- a/unsupported/test/cxx11_tensor_cuda.cu
+++ b/unsupported/test/cxx11_tensor_cuda.cu
@@ -10,19 +10,65 @@
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_TEST_FUNC cxx11_tensor_cuda
-#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#define EIGEN_USE_GPU
-
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
using Eigen::Tensor;
+void test_cuda_nullary() {
+ Tensor<float, 1, 0, int> in1(2);
+ Tensor<float, 1, 0, int> in2(2);
+ in1.setRandom();
+ in2.setRandom();
+
+ std::size_t tensor_bytes = in1.size() * sizeof(float);
+
+ float* d_in1;
+ float* d_in2;
+ cudaMalloc((void**)(&d_in1), tensor_bytes);
+ cudaMalloc((void**)(&d_in2), tensor_bytes);
+ cudaMemcpy(d_in1, in1.data(), tensor_bytes, cudaMemcpyHostToDevice);
+ cudaMemcpy(d_in2, in2.data(), tensor_bytes, cudaMemcpyHostToDevice);
+
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+
+ Eigen::TensorMap<Eigen::Tensor<float, 1, 0, int>, Eigen::Aligned> gpu_in1(
+ d_in1, 2);
+ Eigen::TensorMap<Eigen::Tensor<float, 1, 0, int>, Eigen::Aligned> gpu_in2(
+ d_in2, 2);
+
+ gpu_in1.device(gpu_device) = gpu_in1.constant(3.14f);
+ gpu_in2.device(gpu_device) = gpu_in2.random();
+
+ Tensor<float, 1, 0, int> new1(2);
+ Tensor<float, 1, 0, int> new2(2);
+
+ assert(cudaMemcpyAsync(new1.data(), d_in1, tensor_bytes, cudaMemcpyDeviceToHost,
+ gpu_device.stream()) == cudaSuccess);
+ assert(cudaMemcpyAsync(new2.data(), d_in2, tensor_bytes, cudaMemcpyDeviceToHost,
+ gpu_device.stream()) == cudaSuccess);
+
+ assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
+
+ for (int i = 0; i < 2; ++i) {
+ VERIFY_IS_APPROX(new1(i), 3.14f);
+ VERIFY_IS_NOT_EQUAL(new2(i), in2(i));
+ }
+
+ cudaFree(d_in1);
+ cudaFree(d_in2);
+}
+
void test_cuda_elementwise_small() {
- Tensor<float, 1> in1(Eigen::array<int, 1>(2));
- Tensor<float, 1> in2(Eigen::array<int, 1>(2));
- Tensor<float, 1> out(Eigen::array<int, 1>(2));
+ Tensor<float, 1> in1(Eigen::array<Eigen::DenseIndex, 1>(2));
+ Tensor<float, 1> in2(Eigen::array<Eigen::DenseIndex, 1>(2));
+ Tensor<float, 1> out(Eigen::array<Eigen::DenseIndex, 1>(2));
in1.setRandom();
in2.setRandom();
@@ -44,11 +90,11 @@ void test_cuda_elementwise_small() {
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_in1(
- d_in1, Eigen::array<int, 1>(2));
+ d_in1, Eigen::array<Eigen::DenseIndex, 1>(2));
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_in2(
- d_in2, Eigen::array<int, 1>(2));
+ d_in2, Eigen::array<Eigen::DenseIndex, 1>(2));
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_out(
- d_out, Eigen::array<int, 1>(2));
+ d_out, Eigen::array<Eigen::DenseIndex, 1>(2));
gpu_out.device(gpu_device) = gpu_in1 + gpu_in2;
@@ -58,8 +104,8 @@ void test_cuda_elementwise_small() {
for (int i = 0; i < 2; ++i) {
VERIFY_IS_APPROX(
- out(Eigen::array<int, 1>(i)),
- in1(Eigen::array<int, 1>(i)) + in2(Eigen::array<int, 1>(i)));
+ out(Eigen::array<Eigen::DenseIndex, 1>(i)),
+ in1(Eigen::array<Eigen::DenseIndex, 1>(i)) + in2(Eigen::array<Eigen::DenseIndex, 1>(i)));
}
cudaFree(d_in1);
@@ -69,10 +115,10 @@ void test_cuda_elementwise_small() {
void test_cuda_elementwise()
{
- Tensor<float, 3> in1(Eigen::array<int, 3>(72,53,97));
- Tensor<float, 3> in2(Eigen::array<int, 3>(72,53,97));
- Tensor<float, 3> in3(Eigen::array<int, 3>(72,53,97));
- Tensor<float, 3> out(Eigen::array<int, 3>(72,53,97));
+ Tensor<float, 3> in1(Eigen::array<Eigen::DenseIndex, 3>(72,53,97));
+ Tensor<float, 3> in2(Eigen::array<Eigen::DenseIndex, 3>(72,53,97));
+ Tensor<float, 3> in3(Eigen::array<Eigen::DenseIndex, 3>(72,53,97));
+ Tensor<float, 3> out(Eigen::array<Eigen::DenseIndex, 3>(72,53,97));
in1.setRandom();
in2.setRandom();
in3.setRandom();
@@ -98,10 +144,10 @@ void test_cuda_elementwise()
Eigen::CudaStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
- Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in1(d_in1, Eigen::array<int, 3>(72,53,97));
- Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in2(d_in2, Eigen::array<int, 3>(72,53,97));
- Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in3(d_in3, Eigen::array<int, 3>(72,53,97));
- Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_out(d_out, Eigen::array<int, 3>(72,53,97));
+ Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in1(d_in1, Eigen::array<Eigen::DenseIndex, 3>(72,53,97));
+ Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in2(d_in2, Eigen::array<Eigen::DenseIndex, 3>(72,53,97));
+ Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_in3(d_in3, Eigen::array<Eigen::DenseIndex, 3>(72,53,97));
+ Eigen::TensorMap<Eigen::Tensor<float, 3> > gpu_out(d_out, Eigen::array<Eigen::DenseIndex, 3>(72,53,97));
gpu_out.device(gpu_device) = gpu_in1 + gpu_in2 * gpu_in3;
@@ -111,7 +157,7 @@ void test_cuda_elementwise()
for (int i = 0; i < 72; ++i) {
for (int j = 0; j < 53; ++j) {
for (int k = 0; k < 97; ++k) {
- VERIFY_IS_APPROX(out(Eigen::array<int, 3>(i,j,k)), in1(Eigen::array<int, 3>(i,j,k)) + in2(Eigen::array<int, 3>(i,j,k)) * in3(Eigen::array<int, 3>(i,j,k)));
+ VERIFY_IS_APPROX(out(Eigen::array<Eigen::DenseIndex, 3>(i,j,k)), in1(Eigen::array<Eigen::DenseIndex, 3>(i,j,k)) + in2(Eigen::array<Eigen::DenseIndex, 3>(i,j,k)) * in3(Eigen::array<Eigen::DenseIndex, 3>(i,j,k)));
}
}
}
@@ -181,7 +227,7 @@ void test_cuda_reduction()
Eigen::TensorMap<Eigen::Tensor<float, 4> > gpu_in1(d_in1, 72,53,97,113);
Eigen::TensorMap<Eigen::Tensor<float, 2> > gpu_out(d_out, 72,97);
- array<int, 2> reduction_axis;
+ array<Eigen::DenseIndex, 2> reduction_axis;
reduction_axis[0] = 1;
reduction_axis[1] = 3;
@@ -214,8 +260,8 @@ void test_cuda_contraction()
// more than 30 * 1024, which is the number of threads in blocks on
// a 15 SM GK110 GPU
Tensor<float, 4, DataLayout> t_left(6, 50, 3, 31);
- Tensor<float, 5, DataLayout> t_right(Eigen::array<int, 5>(3, 31, 7, 20, 1));
- Tensor<float, 5, DataLayout> t_result(Eigen::array<int, 5>(6, 50, 7, 20, 1));
+ Tensor<float, 5, DataLayout> t_right(Eigen::array<Eigen::DenseIndex, 5>(3, 31, 7, 20, 1));
+ Tensor<float, 5, DataLayout> t_result(Eigen::array<Eigen::DenseIndex, 5>(6, 50, 7, 20, 1));
t_left.setRandom();
t_right.setRandom();
@@ -299,7 +345,7 @@ void test_cuda_convolution_1d()
Eigen::TensorMap<Eigen::Tensor<float, 1, DataLayout> > gpu_kernel(d_kernel, 4);
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_out(d_out, 74,34,11,137);
- Eigen::array<int, 1> dims(1);
+ Eigen::array<Eigen::DenseIndex, 1> dims(1);
gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
@@ -352,7 +398,7 @@ void test_cuda_convolution_inner_dim_col_major_1d()
Eigen::TensorMap<Eigen::Tensor<float, 1, ColMajor> > gpu_kernel(d_kernel,4);
Eigen::TensorMap<Eigen::Tensor<float, 4, ColMajor> > gpu_out(d_out,71,9,11,7);
- Eigen::array<int, 1> dims(0);
+ Eigen::array<Eigen::DenseIndex, 1> dims(0);
gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
@@ -405,7 +451,7 @@ void test_cuda_convolution_inner_dim_row_major_1d()
Eigen::TensorMap<Eigen::Tensor<float, 1, RowMajor> > gpu_kernel(d_kernel, 4);
Eigen::TensorMap<Eigen::Tensor<float, 4, RowMajor> > gpu_out(d_out, 7,9,11,71);
- Eigen::array<int, 1> dims(3);
+ Eigen::array<Eigen::DenseIndex, 1> dims(3);
gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
@@ -459,7 +505,7 @@ void test_cuda_convolution_2d()
Eigen::TensorMap<Eigen::Tensor<float, 2, DataLayout> > gpu_kernel(d_kernel,3,4);
Eigen::TensorMap<Eigen::Tensor<float, 4, DataLayout> > gpu_out(d_out,74,35,8,137);
- Eigen::array<int, 2> dims(1,2);
+ Eigen::array<Eigen::DenseIndex, 2> dims(1,2);
gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
@@ -496,9 +542,9 @@ void test_cuda_convolution_2d()
template<int DataLayout>
void test_cuda_convolution_3d()
{
- Tensor<float, 5, DataLayout> input(Eigen::array<int, 5>(74,37,11,137,17));
+ Tensor<float, 5, DataLayout> input(Eigen::array<Eigen::DenseIndex, 5>(74,37,11,137,17));
Tensor<float, 3, DataLayout> kernel(3,4,2);
- Tensor<float, 5, DataLayout> out(Eigen::array<int, 5>(74,35,8,136,17));
+ Tensor<float, 5, DataLayout> out(Eigen::array<Eigen::DenseIndex, 5>(74,35,8,136,17));
input = input.constant(10.0f) + input.random();
kernel = kernel.constant(7.0f) + kernel.random();
@@ -523,7 +569,7 @@ void test_cuda_convolution_3d()
Eigen::TensorMap<Eigen::Tensor<float, 3, DataLayout> > gpu_kernel(d_kernel,3,4,2);
Eigen::TensorMap<Eigen::Tensor<float, 5, DataLayout> > gpu_out(d_out,74,35,8,136,17);
- Eigen::array<int, 3> dims(1,2,3);
+ Eigen::array<Eigen::DenseIndex, 3> dims(1,2,3);
gpu_out.device(gpu_device) = gpu_input.convolve(gpu_kernel, dims);
assert(cudaMemcpyAsync(out.data(), d_out, out_bytes, cudaMemcpyDeviceToHost, gpu_device.stream()) == cudaSuccess);
@@ -1168,6 +1214,7 @@ void test_cuda_betainc()
void test_cxx11_tensor_cuda()
{
+ CALL_SUBTEST_1(test_cuda_nullary());
CALL_SUBTEST_1(test_cuda_elementwise_small());
CALL_SUBTEST_1(test_cuda_elementwise());
CALL_SUBTEST_1(test_cuda_props());
diff --git a/unsupported/test/cxx11_tensor_device.cu b/unsupported/test/cxx11_tensor_device.cu
index b6ca54d93..fde20ddf2 100644
--- a/unsupported/test/cxx11_tensor_device.cu
+++ b/unsupported/test/cxx11_tensor_device.cu
@@ -13,7 +13,9 @@
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#define EIGEN_USE_GPU
-
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
diff --git a/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/unsupported/test/cxx11_tensor_of_float16_cuda.cu
index 2f55f9361..cbf401c86 100644
--- a/unsupported/test/cxx11_tensor_of_float16_cuda.cu
+++ b/unsupported/test/cxx11_tensor_of_float16_cuda.cu
@@ -13,7 +13,9 @@
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#define EIGEN_USE_GPU
-
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
@@ -181,30 +183,39 @@ void test_cuda_trancendental() {
float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ float* d_float3 = (float*)gpu_device.allocate(num_elem * sizeof(float));
Eigen::half* d_res1_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
Eigen::half* d_res1_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
Eigen::half* d_res2_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
Eigen::half* d_res2_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
-
- Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float1(
- d_float1, num_elem);
- Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float2(
- d_float2, num_elem);
- Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res1_half(
- d_res1_half, num_elem);
- Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res1_float(
- d_res1_float, num_elem);
- Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res2_half(
- d_res2_half, num_elem);
- Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res2_float(
- d_res2_float, num_elem);
+ Eigen::half* d_res3_half = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
+ Eigen::half* d_res3_float = (Eigen::half*)gpu_device.allocate(num_elem * sizeof(Eigen::half));
+
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float1(d_float1, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float2(d_float2, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float3(d_float3, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res1_half(d_res1_half, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res1_float(d_res1_float, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res2_half(d_res2_half, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res2_float(d_res2_float, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res3_half(d_res3_half, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<Eigen::half, 1>, Eigen::Aligned> gpu_res3_float(d_res3_float, num_elem);
gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f);
gpu_float2.device(gpu_device) = gpu_float2.random() + gpu_float1.constant(0.5f);
+ gpu_float3.device(gpu_device) = gpu_float3.random();
gpu_res1_float.device(gpu_device) = gpu_float1.exp().cast<Eigen::half>();
gpu_res2_float.device(gpu_device) = gpu_float2.log().cast<Eigen::half>();
- gpu_res1_half.device(gpu_device) = gpu_float1.cast<Eigen::half>().exp();
- gpu_res2_half.device(gpu_device) = gpu_float2.cast<Eigen::half>().log();
+ gpu_res3_float.device(gpu_device) = gpu_float3.log1p().cast<Eigen::half>();
+
+ gpu_res1_half.device(gpu_device) = gpu_float1.cast<Eigen::half>();
+ gpu_res1_half.device(gpu_device) = gpu_res1_half.exp();
+
+ gpu_res2_half.device(gpu_device) = gpu_float2.cast<Eigen::half>();
+ gpu_res2_half.device(gpu_device) = gpu_res2_half.log();
+
+ gpu_res3_half.device(gpu_device) = gpu_float3.cast<Eigen::half>();
+ gpu_res3_half.device(gpu_device) = gpu_res3_half.log1p();
Tensor<float, 1> input1(num_elem);
Tensor<Eigen::half, 1> half_prec1(num_elem);
@@ -212,12 +223,18 @@ void test_cuda_trancendental() {
Tensor<float, 1> input2(num_elem);
Tensor<Eigen::half, 1> half_prec2(num_elem);
Tensor<Eigen::half, 1> full_prec2(num_elem);
+ Tensor<float, 1> input3(num_elem);
+ Tensor<Eigen::half, 1> half_prec3(num_elem);
+ Tensor<Eigen::half, 1> full_prec3(num_elem);
gpu_device.memcpyDeviceToHost(input1.data(), d_float1, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(input2.data(), d_float2, num_elem*sizeof(float));
+ gpu_device.memcpyDeviceToHost(input3.data(), d_float3, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(half_prec1.data(), d_res1_half, num_elem*sizeof(Eigen::half));
gpu_device.memcpyDeviceToHost(full_prec1.data(), d_res1_float, num_elem*sizeof(Eigen::half));
gpu_device.memcpyDeviceToHost(half_prec2.data(), d_res2_half, num_elem*sizeof(Eigen::half));
gpu_device.memcpyDeviceToHost(full_prec2.data(), d_res2_float, num_elem*sizeof(Eigen::half));
+ gpu_device.memcpyDeviceToHost(half_prec3.data(), d_res3_half, num_elem*sizeof(Eigen::half));
+ gpu_device.memcpyDeviceToHost(full_prec3.data(), d_res3_float, num_elem*sizeof(Eigen::half));
gpu_device.synchronize();
for (int i = 0; i < num_elem; ++i) {
@@ -231,12 +248,19 @@ void test_cuda_trancendental() {
else
VERIFY_IS_APPROX(full_prec2(i), half_prec2(i));
}
+ for (int i = 0; i < num_elem; ++i) {
+ std::cout << "Checking elemwise plog1 " << i << " input = " << input3(i) << " full = " << full_prec3(i) << " half = " << half_prec3(i) << std::endl;
+ VERIFY_IS_APPROX(full_prec3(i), half_prec3(i));
+ }
gpu_device.deallocate(d_float1);
gpu_device.deallocate(d_float2);
+ gpu_device.deallocate(d_float3);
gpu_device.deallocate(d_res1_half);
gpu_device.deallocate(d_res1_float);
gpu_device.deallocate(d_res2_half);
gpu_device.deallocate(d_res2_float);
+ gpu_device.deallocate(d_res3_float);
+ gpu_device.deallocate(d_res3_half);
}
template<typename>
diff --git a/unsupported/test/cxx11_tensor_random_cuda.cu b/unsupported/test/cxx11_tensor_random_cuda.cu
index fa1a46732..b3be199e1 100644
--- a/unsupported/test/cxx11_tensor_random_cuda.cu
+++ b/unsupported/test/cxx11_tensor_random_cuda.cu
@@ -13,6 +13,9 @@
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#define EIGEN_USE_GPU
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
#include "main.h"
#include <Eigen/CXX11/Tensor>
diff --git a/unsupported/test/cxx11_tensor_reduction_cuda.cu b/unsupported/test/cxx11_tensor_reduction_cuda.cu
index cad0c08e0..6858b43a7 100644
--- a/unsupported/test/cxx11_tensor_reduction_cuda.cu
+++ b/unsupported/test/cxx11_tensor_reduction_cuda.cu
@@ -12,11 +12,14 @@
#define EIGEN_TEST_FUNC cxx11_tensor_reduction_cuda
#define EIGEN_USE_GPU
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
-template<int DataLayout>
+template<typename Type, int DataLayout>
static void test_full_reductions() {
Eigen::CudaStreamDevice stream;
@@ -25,24 +28,24 @@ static void test_full_reductions() {
const int num_rows = internal::random<int>(1024, 5*1024);
const int num_cols = internal::random<int>(1024, 5*1024);
- Tensor<float, 2, DataLayout> in(num_rows, num_cols);
+ Tensor<Type, 2, DataLayout> in(num_rows, num_cols);
in.setRandom();
- Tensor<float, 0, DataLayout> full_redux;
+ Tensor<Type, 0, DataLayout> full_redux;
full_redux = in.sum();
- std::size_t in_bytes = in.size() * sizeof(float);
- std::size_t out_bytes = full_redux.size() * sizeof(float);
- float* gpu_in_ptr = static_cast<float*>(gpu_device.allocate(in_bytes));
- float* gpu_out_ptr = static_cast<float*>(gpu_device.allocate(out_bytes));
+ std::size_t in_bytes = in.size() * sizeof(Type);
+ std::size_t out_bytes = full_redux.size() * sizeof(Type);
+ Type* gpu_in_ptr = static_cast<Type*>(gpu_device.allocate(in_bytes));
+ Type* gpu_out_ptr = static_cast<Type*>(gpu_device.allocate(out_bytes));
gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes);
- TensorMap<Tensor<float, 2, DataLayout> > in_gpu(gpu_in_ptr, num_rows, num_cols);
- TensorMap<Tensor<float, 0, DataLayout> > out_gpu(gpu_out_ptr);
+ TensorMap<Tensor<Type, 2, DataLayout> > in_gpu(gpu_in_ptr, num_rows, num_cols);
+ TensorMap<Tensor<Type, 0, DataLayout> > out_gpu(gpu_out_ptr);
out_gpu.device(gpu_device) = in_gpu.sum();
- Tensor<float, 0, DataLayout> full_redux_gpu;
+ Tensor<Type, 0, DataLayout> full_redux_gpu;
gpu_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_ptr, out_bytes);
gpu_device.synchronize();
@@ -53,7 +56,102 @@ static void test_full_reductions() {
gpu_device.deallocate(gpu_out_ptr);
}
+template<typename Type, int DataLayout>
+static void test_first_dim_reductions() {
+ int dim_x = 33;
+ int dim_y = 1;
+ int dim_z = 128;
+
+ Tensor<Type, 3, DataLayout> in(dim_x, dim_y, dim_z);
+ in.setRandom();
+
+ Eigen::array<int, 1> red_axis;
+ red_axis[0] = 0;
+ Tensor<Type, 2, DataLayout> redux = in.sum(red_axis);
+
+ // Create device
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice dev(&stream);
+
+ // Create data(T)
+ Type* in_data = (Type*)dev.allocate(dim_x*dim_y*dim_z*sizeof(Type));
+ Type* out_data = (Type*)dev.allocate(dim_z*dim_y*sizeof(Type));
+ Eigen::TensorMap<Eigen::Tensor<Type, 3, DataLayout> > gpu_in(in_data, dim_x, dim_y, dim_z);
+ Eigen::TensorMap<Eigen::Tensor<Type, 2, DataLayout> > gpu_out(out_data, dim_y, dim_z);
+
+ // Perform operation
+ dev.memcpyHostToDevice(in_data, in.data(), in.size()*sizeof(Type));
+ gpu_out.device(dev) = gpu_in.sum(red_axis);
+ gpu_out.device(dev) += gpu_in.sum(red_axis);
+ Tensor<Type, 2, DataLayout> redux_gpu(dim_y, dim_z);
+ dev.memcpyDeviceToHost(redux_gpu.data(), out_data, gpu_out.size()*sizeof(Type));
+ dev.synchronize();
+
+ // Check that the CPU and GPU reductions return the same result.
+ for (int i = 0; i < gpu_out.size(); ++i) {
+ VERIFY_IS_APPROX(2*redux(i), redux_gpu(i));
+ }
+
+ dev.deallocate(in_data);
+ dev.deallocate(out_data);
+}
+
+template<typename Type, int DataLayout>
+static void test_last_dim_reductions() {
+ int dim_x = 128;
+ int dim_y = 1;
+ int dim_z = 33;
+
+ Tensor<Type, 3, DataLayout> in(dim_x, dim_y, dim_z);
+ in.setRandom();
+
+ Eigen::array<int, 1> red_axis;
+ red_axis[0] = 2;
+ Tensor<Type, 2, DataLayout> redux = in.sum(red_axis);
+
+ // Create device
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice dev(&stream);
+
+ // Create data
+ Type* in_data = (Type*)dev.allocate(dim_x*dim_y*dim_z*sizeof(Type));
+ Type* out_data = (Type*)dev.allocate(dim_x*dim_y*sizeof(Type));
+ Eigen::TensorMap<Eigen::Tensor<Type, 3, DataLayout> > gpu_in(in_data, dim_x, dim_y, dim_z);
+ Eigen::TensorMap<Eigen::Tensor<Type, 2, DataLayout> > gpu_out(out_data, dim_x, dim_y);
+
+ // Perform operation
+ dev.memcpyHostToDevice(in_data, in.data(), in.size()*sizeof(Type));
+ gpu_out.device(dev) = gpu_in.sum(red_axis);
+ gpu_out.device(dev) += gpu_in.sum(red_axis);
+ Tensor<Type, 2, DataLayout> redux_gpu(dim_x, dim_y);
+ dev.memcpyDeviceToHost(redux_gpu.data(), out_data, gpu_out.size()*sizeof(Type));
+ dev.synchronize();
+
+ // Check that the CPU and GPU reductions return the same result.
+ for (int i = 0; i < gpu_out.size(); ++i) {
+ VERIFY_IS_APPROX(2*redux(i), redux_gpu(i));
+ }
+
+ dev.deallocate(in_data);
+ dev.deallocate(out_data);
+}
+
+
void test_cxx11_tensor_reduction_cuda() {
- CALL_SUBTEST_1(test_full_reductions<ColMajor>());
- CALL_SUBTEST_2(test_full_reductions<RowMajor>());
+ CALL_SUBTEST_1((test_full_reductions<float, ColMajor>()));
+ CALL_SUBTEST_1((test_full_reductions<double, ColMajor>()));
+ CALL_SUBTEST_2((test_full_reductions<float, RowMajor>()));
+ CALL_SUBTEST_2((test_full_reductions<double, RowMajor>()));
+
+ CALL_SUBTEST_3((test_first_dim_reductions<float, ColMajor>()));
+ CALL_SUBTEST_3((test_first_dim_reductions<double, ColMajor>()));
+ CALL_SUBTEST_4((test_first_dim_reductions<float, RowMajor>()));
+// Outer reductions of doubles aren't supported just yet.
+// CALL_SUBTEST_4((test_first_dim_reductions<double, RowMajor>()))
+
+ CALL_SUBTEST_5((test_last_dim_reductions<float, ColMajor>()));
+// Outer reductions of doubles aren't supported just yet.
+// CALL_SUBTEST_5((test_last_dim_reductions<double, ColMajor>()));
+ CALL_SUBTEST_6((test_last_dim_reductions<float, RowMajor>()));
+ CALL_SUBTEST_6((test_last_dim_reductions<double, RowMajor>()));
}
diff --git a/unsupported/test/cxx11_tensor_scan_cuda.cu b/unsupported/test/cxx11_tensor_scan_cuda.cu
index 35e19e51c..761d11fd1 100644
--- a/unsupported/test/cxx11_tensor_scan_cuda.cu
+++ b/unsupported/test/cxx11_tensor_scan_cuda.cu
@@ -13,7 +13,9 @@
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#define EIGEN_USE_GPU
-
+#if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
+#include <cuda_fp16.h>
+#endif
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
diff --git a/unsupported/test/kronecker_product.cpp b/unsupported/test/kronecker_product.cpp
index 02411a262..e770049e5 100644
--- a/unsupported/test/kronecker_product.cpp
+++ b/unsupported/test/kronecker_product.cpp
@@ -9,12 +9,12 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#ifdef EIGEN_TEST_PART_1
#include "sparse.h"
#include <Eigen/SparseExtra>
#include <Eigen/KroneckerProduct>
-
template<typename MatrixType>
void check_dimension(const MatrixType& ab, const int rows, const int cols)
{
@@ -230,3 +230,23 @@ void test_kronecker_product()
VERIFY_IS_APPROX(MatrixXf(sC2),dC);
}
}
+
+#endif
+
+#ifdef EIGEN_TEST_PART_2
+
+// simply check that for a dense kronecker product, sparse module is not needed
+
+#include "main.h"
+#include <Eigen/KroneckerProduct>
+
+void test_kronecker_product()
+{
+ MatrixXd a(2,2), b(3,3), c;
+ a.setRandom();
+ b.setRandom();
+ c = kroneckerProduct(a,b);
+ VERIFY_IS_APPROX(c.block(3,3,3,3), a(1,1)*b);
+}
+
+#endif