aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported
diff options
context:
space:
mode:
authorGravatar Igor Babuschkin <igor@babuschk.in>2016-08-05 14:34:57 +0100
committerGravatar Igor Babuschkin <igor@babuschk.in>2016-08-05 14:34:57 +0100
commit0425118e2a1653aa1eecbfeccb244760222cd69c (patch)
tree224976b67ccb458cfb10a23592d4c8e7e7f98257 /unsupported
parent9537e8b118062191b0b3f7ac58fb237dbbc6587b (diff)
parentfe4b927e9c8e796a07c5864e58630e888979519e (diff)
Merge upstream changes
Diffstat (limited to 'unsupported')
-rw-r--r--unsupported/Eigen/CMakeLists.txt1
-rw-r--r--unsupported/Eigen/CXX11/Tensor1
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBase.h18
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h74
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h12
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h1
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h8
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorIO.h5
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h50
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h26
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorScan.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h3
-rw-r--r--unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h4
-rw-r--r--unsupported/Eigen/MPRealSupport29
-rw-r--r--unsupported/Eigen/SpecialFunctions61
-rwxr-xr-xunsupported/Eigen/src/AutoDiff/AutoDiffScalar.h37
-rw-r--r--unsupported/Eigen/src/CMakeLists.txt1
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/CMakeLists.txt11
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h124
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h236
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h47
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h1551
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h58
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h165
-rw-r--r--unsupported/Eigen/src/Splines/Spline.h2
-rw-r--r--unsupported/test/CMakeLists.txt13
-rw-r--r--unsupported/test/FFTW.cpp32
-rw-r--r--unsupported/test/autodiff.cpp32
-rw-r--r--unsupported/test/cxx11_float16.cpp203
-rw-r--r--unsupported/test/cxx11_tensor_dimension.cpp16
-rw-r--r--unsupported/test/cxx11_tensor_morphing.cpp48
-rw-r--r--unsupported/test/cxx11_tensor_of_float16_cuda.cu87
-rw-r--r--unsupported/test/cxx11_tensor_reduction.cpp29
-rw-r--r--unsupported/test/mpreal_support.cpp1
-rw-r--r--unsupported/test/special_functions.cpp345
39 files changed, 2951 insertions, 392 deletions
diff --git a/unsupported/Eigen/CMakeLists.txt b/unsupported/Eigen/CMakeLists.txt
index 6d0cf4f9d..7478b6b0d 100644
--- a/unsupported/Eigen/CMakeLists.txt
+++ b/unsupported/Eigen/CMakeLists.txt
@@ -17,6 +17,7 @@ set(Eigen_HEADERS
Polynomials
Skyline
SparseExtra
+ SpecialFunctions
Splines
)
diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor
index 79bac2f67..f7b94cee1 100644
--- a/unsupported/Eigen/CXX11/Tensor
+++ b/unsupported/Eigen/CXX11/Tensor
@@ -15,6 +15,7 @@
#include <Eigen/src/Core/util/DisableStupidWarnings.h>
+#include "../SpecialFunctions"
#include "src/util/CXX11Meta.h"
#include "src/util/MaxSizeVector.h"
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
index 73bfac40e..19d2b50b5 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
@@ -210,6 +210,18 @@ class TensorBase<Derived, ReadOnlyAccessors>
}
EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_real_op<Scalar>, const Derived>
+ real() const {
+ return unaryExpr(internal::scalar_real_op<Scalar>());
+ }
+
+ EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_imag_op<Scalar>, const Derived>
+ imag() const {
+ return unaryExpr(internal::scalar_imag_op<Scalar>());
+ }
+
+ EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::bind2nd_op<internal::scalar_sum_op<Scalar,Scalar> >, const Derived>
operator+ (Scalar rhs) const {
return unaryExpr(internal::bind2nd_op<internal::scalar_sum_op<Scalar,Scalar> >(rhs));
@@ -805,8 +817,8 @@ class TensorBase<Derived, ReadOnlyAccessors>
EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast<const Derived*>(this); }
};
-template<typename Derived>
-class TensorBase<Derived, WriteAccessors> : public TensorBase<Derived, ReadOnlyAccessors> {
+template<typename Derived, int AccessLevel = internal::accessors_level<Derived>::value>
+class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> {
public:
typedef internal::traits<Derived> DerivedTraits;
typedef typename DerivedTraits::Scalar Scalar;
@@ -816,7 +828,7 @@ class TensorBase<Derived, WriteAccessors> : public TensorBase<Derived, ReadOnlyA
template <typename Scalar, int NumIndices, int Options, typename IndexType> friend class Tensor;
template <typename Scalar, typename Dimensions, int Option, typename IndexTypes> friend class TensorFixedSize;
- template <typename OtherDerived, int AccessLevel> friend class TensorBase;
+ template <typename OtherDerived, int OtherAccessLevel> friend class TensorBase;
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE Derived& setZero() {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h
index 886474986..d65dbb40f 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h
@@ -461,8 +461,8 @@ EigenContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs,
#undef writeResultShmem
#undef writeRow
- const int max_i_write = (min)((int)((m_size - base_m - threadIdx.y + 7) / 8), 8);
- const int max_j_write = (min)((int)((n_size - base_n - threadIdx.z + 7) / 8), 8);
+ const int max_i_write = numext::mini((int)((m_size - base_m - threadIdx.y + 7) / 8), 8);
+ const int max_j_write = numext::mini((int)((n_size - base_n - threadIdx.z + 7) / 8), 8);
if (threadIdx.x < max_i_write) {
if (max_j_write == 8) {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h
index b27e1a1b4..9b2cb3ff6 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h
@@ -130,19 +130,19 @@ class SimpleTensorContractionMapper {
}
Index contract_val = left ? col : row;
- for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) {
- const Index idx = contract_val / m_k_strides[i];
- linidx += idx * m_contract_strides[i];
- contract_val -= idx * m_k_strides[i];
- }
-
if(array_size<contract_t>::value > 0) {
- if (side == Rhs && inner_dim_contiguous) {
- eigen_assert(m_contract_strides[0] == 1);
- linidx += contract_val;
- } else {
- linidx += contract_val * m_contract_strides[0];
- }
+ for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) {
+ const Index idx = contract_val / m_k_strides[i];
+ linidx += idx * m_contract_strides[i];
+ contract_val -= idx * m_k_strides[i];
+ }
+
+ if (side == Rhs && inner_dim_contiguous) {
+ eigen_assert(m_contract_strides[0] == 1);
+ linidx += contract_val;
+ } else {
+ linidx += contract_val * m_contract_strides[0];
+ }
}
return linidx;
@@ -153,15 +153,15 @@ class SimpleTensorContractionMapper {
const bool left = (side == Lhs);
Index nocontract_val[2] = {left ? row : col, left ? row + distance : col};
Index linidx[2] = {0, 0};
- for (int i = static_cast<int>(array_size<nocontract_t>::value) - 1; i > 0; i--) {
- const Index idx0 = nocontract_val[0] / m_ij_strides[i];
- const Index idx1 = nocontract_val[1] / m_ij_strides[i];
- linidx[0] += idx0 * m_nocontract_strides[i];
- linidx[1] += idx1 * m_nocontract_strides[i];
- nocontract_val[0] -= idx0 * m_ij_strides[i];
- nocontract_val[1] -= idx1 * m_ij_strides[i];
- }
if (array_size<typename Tensor::Dimensions>::value > array_size<contract_t>::value) {
+ for (int i = static_cast<int>(array_size<nocontract_t>::value) - 1; i > 0; i--) {
+ const Index idx0 = nocontract_val[0] / m_ij_strides[i];
+ const Index idx1 = nocontract_val[1] / m_ij_strides[i];
+ linidx[0] += idx0 * m_nocontract_strides[i];
+ linidx[1] += idx1 * m_nocontract_strides[i];
+ nocontract_val[0] -= idx0 * m_ij_strides[i];
+ nocontract_val[1] -= idx1 * m_ij_strides[i];
+ }
if (side == Lhs && inner_dim_contiguous) {
eigen_assert(m_nocontract_strides[0] == 1);
linidx[0] += nocontract_val[0];
@@ -173,22 +173,24 @@ class SimpleTensorContractionMapper {
}
Index contract_val[2] = {left ? col : row, left ? col : row + distance};
- for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) {
- const Index idx0 = contract_val[0] / m_k_strides[i];
- const Index idx1 = contract_val[1] / m_k_strides[i];
- linidx[0] += idx0 * m_contract_strides[i];
- linidx[1] += idx1 * m_contract_strides[i];
- contract_val[0] -= idx0 * m_k_strides[i];
- contract_val[1] -= idx1 * m_k_strides[i];
- }
+ if (array_size<contract_t>::value> 0) {
+ for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) {
+ const Index idx0 = contract_val[0] / m_k_strides[i];
+ const Index idx1 = contract_val[1] / m_k_strides[i];
+ linidx[0] += idx0 * m_contract_strides[i];
+ linidx[1] += idx1 * m_contract_strides[i];
+ contract_val[0] -= idx0 * m_k_strides[i];
+ contract_val[1] -= idx1 * m_k_strides[i];
+ }
- if (side == Rhs && inner_dim_contiguous) {
- eigen_assert(m_contract_strides[0] == 1);
- linidx[0] += contract_val[0];
- linidx[1] += contract_val[1];
- } else {
- linidx[0] += contract_val[0] * m_contract_strides[0];
- linidx[1] += contract_val[1] * m_contract_strides[0];
+ if (side == Rhs && inner_dim_contiguous) {
+ eigen_assert(m_contract_strides[0] == 1);
+ linidx[0] += contract_val[0];
+ linidx[1] += contract_val[1];
+ } else {
+ linidx[0] += contract_val[0] * m_contract_strides[0];
+ linidx[1] += contract_val[1] * m_contract_strides[0];
+ }
}
return IndexPair<Index>(linidx[0], linidx[1]);
}
@@ -200,7 +202,7 @@ class SimpleTensorContractionMapper {
return (Alignment == Aligned) && (side == Lhs) && inner_dim_contiguous ? 0 : size;
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index stride() const {
- return ((side == Lhs) && inner_dim_contiguous) ? m_contract_strides[0] : 1;
+ return ((side == Lhs) && inner_dim_contiguous && array_size<contract_t>::value > 0) ? m_contract_strides[0] : 1;
}
protected:
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
index 34270730b..069680a11 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
@@ -151,9 +151,7 @@ struct ThreadPoolDevice {
template <class Function, class... Args>
EIGEN_STRONG_INLINE Notification* enqueue(Function&& f, Args&&... args) const {
Notification* n = new Notification();
- std::function<void()> func =
- std::bind(&FunctionWrapperWithNotification<Function, Args...>::run, n, f, args...);
- pool_->Schedule(func);
+ pool_->Schedule(std::bind(&FunctionWrapperWithNotification<Function, Args...>::run, n, f, args...));
return n;
}
@@ -161,15 +159,13 @@ struct ThreadPoolDevice {
EIGEN_STRONG_INLINE void enqueue_with_barrier(Barrier* b,
Function&& f,
Args&&... args) const {
- std::function<void()> func = std::bind(
- &FunctionWrapperWithBarrier<Function, Args...>::run, b, f, args...);
- pool_->Schedule(func);
+ pool_->Schedule(std::bind(
+ &FunctionWrapperWithBarrier<Function, Args...>::run, b, f, args...));
}
template <class Function, class... Args>
EIGEN_STRONG_INLINE void enqueueNoNotification(Function&& f, Args&&... args) const {
- std::function<void()> func = std::bind(f, args...);
- pool_->Schedule(func);
+ pool_->Schedule(std::bind(f, args...));
}
// Returns a logical thread index between 0 and pool_->NumThreads() - 1 if
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
index 26b1f65a8..a08dfa7c3 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
@@ -94,7 +94,7 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device>
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
enum {
- IsAligned = true,
+ IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false, // to be implemented
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
index a48cb1daa..c2a327bf0 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
@@ -131,7 +131,7 @@ double loadConstant(const double* address) {
}
template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
Eigen::half loadConstant(const Eigen::half* address) {
- return Eigen::half(internal::raw_uint16_to_half(__ldg(&address->x)));
+ return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x)));
}
#endif
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index ad5c97b57..a116bf17f 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -159,7 +159,6 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable> {
#else
size_t num_threads = device.numThreads();
if (num_threads > 1) {
- cost = evaluator.costPerCoeff(Vectorizable)
num_threads = TensorCostModel<ThreadPoolDevice>::numThreads(
size, evaluator.costPerCoeff(Vectorizable), num_threads);
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
index ece2ed91b..08eb5595a 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h
@@ -329,7 +329,7 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D
for (Index i = 0; i < n; ++i) {
if(FFTDir == FFT_FORWARD) {
- a[i] = data[i] * std::conj(pos_j_base_powered[i]);
+ a[i] = data[i] * numext::conj(pos_j_base_powered[i]);
}
else {
a[i] = data[i] * pos_j_base_powered[i];
@@ -344,7 +344,7 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D
b[i] = pos_j_base_powered[i];
}
else {
- b[i] = std::conj(pos_j_base_powered[i]);
+ b[i] = numext::conj(pos_j_base_powered[i]);
}
}
for (Index i = n; i < m - n; ++i) {
@@ -355,7 +355,7 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D
b[i] = pos_j_base_powered[m-i];
}
else {
- b[i] = std::conj(pos_j_base_powered[m-i]);
+ b[i] = numext::conj(pos_j_base_powered[m-i]);
}
}
@@ -379,7 +379,7 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D
for (Index i = 0; i < n; ++i) {
if(FFTDir == FFT_FORWARD) {
- data[i] = a[i] * std::conj(pos_j_base_powered[i]);
+ data[i] = a[i] * numext::conj(pos_j_base_powered[i]);
}
else {
data[i] = a[i] * pos_j_base_powered[i];
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
index f35275ffb..490ddd8bd 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
@@ -16,7 +16,7 @@ template<typename Scalar_, int NumIndices_, int Options_ = 0, typename IndexType
template<typename Scalar_, typename Dimensions, int Options_ = 0, typename IndexType = DenseIndex> class TensorFixedSize;
template<typename PlainObjectType, int Options_ = Unaligned> class TensorMap;
template<typename PlainObjectType> class TensorRef;
-template<typename Derived, int AccessLevel = internal::accessors_level<Derived>::value> class TensorBase;
+template<typename Derived, int AccessLevel> class TensorBase;
template<typename NullaryOp, typename PlainObjectType> class TensorCwiseNullaryOp;
template<typename UnaryOp, typename XprType> class TensorCwiseUnaryOp;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
index f3a3a1b88..a901c5dd4 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
@@ -13,11 +13,6 @@
namespace Eigen {
namespace internal {
-template<>
-struct significant_decimals_impl<std::string>
- : significant_decimals_default_impl<std::string, true>
-{};
-
// Print the tensor as a 2d matrix
template <typename Tensor, int Rank>
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
index 33c6c1b0f..ede3939c2 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h
@@ -29,25 +29,47 @@ namespace Eigen {
namespace internal {
namespace {
+
// Note: result is undefined if val == 0
template <typename T>
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int count_leading_zeros(const T val)
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+ typename internal::enable_if<sizeof(T)==4,int>::type count_leading_zeros(const T val)
{
#ifdef __CUDA_ARCH__
- return (sizeof(T) == 8) ? __clzll(val) : __clz(val);
+ return __clz(val);
#elif EIGEN_COMP_MSVC
- unsigned long index;
- if (sizeof(T) == 8) {
- _BitScanReverse64(&index, val);
- } else {
- _BitScanReverse(&index, val);
- }
- return (sizeof(T) == 8) ? 63 - index : 31 - index;
+ unsigned long index;
+ _BitScanReverse(&index, val);
+ return 31 - index;
+#else
+ EIGEN_STATIC_ASSERT(sizeof(unsigned long long) == 8, YOU_MADE_A_PROGRAMMING_MISTAKE);
+ return __builtin_clz(static_cast<uint32_t>(val));
+#endif
+ }
+
+ template <typename T>
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
+ typename internal::enable_if<sizeof(T)==8,int>::type count_leading_zeros(const T val)
+ {
+#ifdef __CUDA_ARCH__
+ return __clzll(val);
+#elif EIGEN_COMP_MSVC && EIGEN_ARCH_x86_64
+ unsigned long index;
+ _BitScanReverse64(&index, val);
+ return 63 - index;
+#elif EIGEN_COMP_MSVC
+ // MSVC's _BitScanReverse64 is not available for 32bits builds.
+ unsigned int lo = (unsigned int)(val&0xffffffff);
+ unsigned int hi = (unsigned int)((val>>32)&0xffffffff);
+ int n;
+ if(hi==0)
+ n = 32 + count_leading_zeros<unsigned int>(lo);
+ else
+ n = count_leading_zeros<unsigned int>(hi);
+ return n;
#else
EIGEN_STATIC_ASSERT(sizeof(unsigned long long) == 8, YOU_MADE_A_PROGRAMMING_MISTAKE);
- return (sizeof(T) == 8) ?
- __builtin_clzll(static_cast<uint64_t>(val)) :
- __builtin_clz(static_cast<uint32_t>(val));
+ return __builtin_clzll(static_cast<uint64_t>(val));
#endif
}
@@ -98,7 +120,9 @@ namespace {
return static_cast<uint64_t>((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1);
#else
const uint64_t shift = 1ULL << log_div;
- TensorUInt128<uint64_t, uint64_t> result = (TensorUInt128<uint64_t, static_val<0> >(shift, 0) / TensorUInt128<static_val<0>, uint64_t>(divider) - TensorUInt128<static_val<1>, static_val<0> >(1, 0) + TensorUInt128<static_val<0>, static_val<1> >(1));
+ TensorUInt128<uint64_t, uint64_t> result = TensorUInt128<uint64_t, static_val<0> >(shift, 0) / TensorUInt128<static_val<0>, uint64_t>(divider)
+ - TensorUInt128<static_val<1>, static_val<0> >(1, 0)
+ + TensorUInt128<static_val<0>, static_val<1> >(1);
return static_cast<uint64_t>(result);
#endif
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
index 04ba45a8f..0e1576954 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@@ -264,7 +264,7 @@ struct FullReducer<Self, Op, ThreadPoolDevice, Vectorizable> {
const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0;
eigen_assert(num_coeffs >= numblocks * blocksize);
- Barrier barrier(numblocks);
+ Barrier barrier(internal::convert_index<unsigned int>(numblocks));
MaxSizeVector<typename Self::CoeffReturnType> shards(numblocks, reducer.initialize());
for (Index i = 0; i < numblocks; ++i) {
device.enqueue_with_barrier(&barrier, &FullReducerShard<Self, Op, Vectorizable>::run,
@@ -492,7 +492,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
}
// Attempt to use an optimized reduction.
- else if (RunningOnGPU && data && (m_device.majorDeviceVersion() >= 3)) {
+ else if (RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) {
bool reducing_inner_dims = true;
for (int i = 0; i < NumReducedDims; ++i) {
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
@@ -505,8 +505,12 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
(reducing_inner_dims || ReducingInnerMostDims)) {
const Index num_values_to_reduce = internal::array_prod(m_reducedDims);
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
+ if (!data && num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve) {
+ data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
+ m_result = data;
+ }
Op reducer(m_reducer);
- return internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve);
+ return internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve) || (m_result != NULL);
}
bool preserving_inner_dims = true;
@@ -521,8 +525,12 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
preserving_inner_dims) {
const Index num_values_to_reduce = internal::array_prod(m_reducedDims);
const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions);
+ if (!data && num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve) {
+ data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve));
+ m_result = data;
+ }
Op reducer(m_reducer);
- return internal::OuterReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve);
+ return internal::OuterReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve) || (m_result != NULL);
}
}
return true;
@@ -537,8 +545,8 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
{
- if (RunningFullReduction && m_result) {
- return *m_result;
+ if ((RunningFullReduction || RunningOnGPU) && m_result) {
+ return *(m_result + index);
}
Op reducer(m_reducer);
if (ReducingInnerMostDims || RunningFullReduction) {
@@ -558,7 +566,11 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
- eigen_assert(index + PacketSize - 1 < internal::array_prod(dimensions()));
+ eigen_assert(index + PacketSize - 1 < Index(internal::array_prod(dimensions())));
+
+ if (RunningOnGPU && m_result) {
+ return internal::pload<PacketReturnType>(m_result + index);
+ }
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
if (ReducingInnerMostDims) {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
index a61b14ded..8501466ce 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
@@ -94,7 +94,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
enum {
IsAligned = false,
- PacketAccess = (internal::packet_traits<Scalar>::size > 1),
+ PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1),
BlockAccess = false,
Layout = TensorEvaluator<ArgType, Device>::Layout,
CoordAccess = false,
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h b/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h
index bdcd70fd9..3523e7c94 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h
@@ -20,6 +20,7 @@ struct static_val {
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator uint64_t() const { return n; }
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val() { }
+
template <typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val(const T& v) {
eigen_assert(v == n);
@@ -53,7 +54,7 @@ struct TensorUInt128
template<typename T>
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
explicit TensorUInt128(const T& x) : high(0), low(x) {
- eigen_assert((static_cast<typename conditional<sizeof(T) == 8, uint64_t, uint32_t>::type>(x) <= static_cast<typename conditional<sizeof(LOW) == 8, uint64_t, uint32_t>::type>(NumTraits<LOW>::highest())));
+ eigen_assert((static_cast<typename conditional<sizeof(T) == 8, uint64_t, uint32_t>::type>(x) <= NumTraits<uint64_t>::highest()));
eigen_assert(x >= 0);
}
diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h b/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h
index d2204ad5b..399f95cc1 100644
--- a/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h
+++ b/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h
@@ -21,14 +21,14 @@ struct StlThreadEnvironment {
// destructor must join the thread.
class EnvThread {
public:
- EnvThread(std::function<void()> f) : thr_(f) {}
+ EnvThread(std::function<void()> f) : thr_(std::move(f)) {}
~EnvThread() { thr_.join(); }
private:
std::thread thr_;
};
- EnvThread* CreateThread(std::function<void()> f) { return new EnvThread(f); }
+ EnvThread* CreateThread(std::function<void()> f) { return new EnvThread(std::move(f)); }
Task CreateTask(std::function<void()> f) { return Task{std::move(f)}; }
void ExecuteTask(const Task& t) { t.f(); }
};
diff --git a/unsupported/Eigen/MPRealSupport b/unsupported/Eigen/MPRealSupport
index 89036886b..7f0b70c63 100644
--- a/unsupported/Eigen/MPRealSupport
+++ b/unsupported/Eigen/MPRealSupport
@@ -67,27 +67,32 @@ int main()
IsSigned = 1,
IsComplex = 0,
RequireInitialization = 1,
- ReadCost = 10,
- AddCost = 10,
- MulCost = 40
+ ReadCost = HugeCost,
+ AddCost = HugeCost,
+ MulCost = HugeCost
};
typedef mpfr::mpreal Real;
typedef mpfr::mpreal NonInteger;
- inline static Real highest (long Precision = mpfr::mpreal::get_default_prec()) { return mpfr::maxval(Precision); }
- inline static Real lowest (long Precision = mpfr::mpreal::get_default_prec()) { return -mpfr::maxval(Precision); }
+ static inline Real highest (long Precision = mpfr::mpreal::get_default_prec()) { return mpfr::maxval(Precision); }
+ static inline Real lowest (long Precision = mpfr::mpreal::get_default_prec()) { return -mpfr::maxval(Precision); }
// Constants
- inline static Real Pi (long Precision = mpfr::mpreal::get_default_prec()) { return mpfr::const_pi(Precision); }
- inline static Real Euler (long Precision = mpfr::mpreal::get_default_prec()) { return mpfr::const_euler(Precision); }
- inline static Real Log2 (long Precision = mpfr::mpreal::get_default_prec()) { return mpfr::const_log2(Precision); }
- inline static Real Catalan (long Precision = mpfr::mpreal::get_default_prec()) { return mpfr::const_catalan(Precision); }
+ static inline Real Pi (long Precision = mpfr::mpreal::get_default_prec()) { return mpfr::const_pi(Precision); }
+ static inline Real Euler (long Precision = mpfr::mpreal::get_default_prec()) { return mpfr::const_euler(Precision); }
+ static inline Real Log2 (long Precision = mpfr::mpreal::get_default_prec()) { return mpfr::const_log2(Precision); }
+ static inline Real Catalan (long Precision = mpfr::mpreal::get_default_prec()) { return mpfr::const_catalan(Precision); }
- inline static Real epsilon (long Precision = mpfr::mpreal::get_default_prec()) { return mpfr::machine_epsilon(Precision); }
- inline static Real epsilon (const Real& x) { return mpfr::machine_epsilon(x); }
+ static inline Real epsilon (long Precision = mpfr::mpreal::get_default_prec()) { return mpfr::machine_epsilon(Precision); }
+ static inline Real epsilon (const Real& x) { return mpfr::machine_epsilon(x); }
- inline static Real dummy_precision()
+#ifdef MPREAL_HAVE_DYNAMIC_STD_NUMERIC_LIMITS
+ static inline int digits10 (long Precision = mpfr::mpreal::get_default_prec()) { return std::numeric_limits<Real>::digits10(Precision); }
+ static inline int digits10 (const Real& x) { return std::numeric_limits<Real>::digits10(x); }
+#endif
+
+ static inline Real dummy_precision()
{
mpfr_prec_t weak_prec = ((mpfr::mpreal::get_default_prec()-1) * 90) / 100;
return mpfr::machine_epsilon(weak_prec);
diff --git a/unsupported/Eigen/SpecialFunctions b/unsupported/Eigen/SpecialFunctions
new file mode 100644
index 000000000..7c7493c56
--- /dev/null
+++ b/unsupported/Eigen/SpecialFunctions
@@ -0,0 +1,61 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Gael Guennebaud <g.gael@free.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SPECIALFUNCTIONS_MODULE
+#define EIGEN_SPECIALFUNCTIONS_MODULE
+
+#include "../../Eigen/Core"
+
+#include "../../Eigen/src/Core/util/DisableStupidWarnings.h"
+
+namespace Eigen {
+
+/**
+ * \defgroup SpecialFunctions_Module Special math functions module
+ *
+ * This module features additional coefficient-wise math functions available
+ * within the numext:: namespace for the scalar version, and as method and/or free
+ * functions of Array. Those include:
+ *
+ * - erf
+ * - erfc
+ * - lgamma
+ * - igamma
+ * - igammac
+ * - digamma
+ * - polygamma
+ * - zeta
+ * - betainc
+ *
+ * \code
+ * #include <unsupported/Eigen/SpecialFunctions>
+ * \endcode
+ */
+//@{
+
+}
+
+#include "src/SpecialFunctions/SpecialFunctionsImpl.h"
+#include "src/SpecialFunctions/SpecialFunctionsPacketMath.h"
+#include "src/SpecialFunctions/SpecialFunctionsHalf.h"
+#include "src/SpecialFunctions/SpecialFunctionsFunctors.h"
+#include "src/SpecialFunctions/SpecialFunctionsArrayAPI.h"
+
+#if defined EIGEN_VECTORIZE_CUDA
+ #include "src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h"
+#endif
+
+namespace Eigen {
+//@}
+}
+
+
+#include "../../Eigen/src/Core/util/ReenableStupidWarnings.h"
+
+#endif // EIGEN_SPECIALFUNCTIONS_MODULE
diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
index 76bf2d96c..50fedf6ac 100755
--- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
+++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
@@ -497,31 +497,15 @@ struct make_coherent_impl<Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows,
} // end namespace internal
-template<typename A_Scalar, int A_Rows, int A_Cols, int A_Options, int A_MaxRows, int A_MaxCols, typename BinOp>
-struct ScalarBinaryOpTraits<Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols>,A_Scalar,BinOp>
-{
- enum { Defined = 1 };
- typedef Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols> ReturnType;
-};
-
-template<typename A_Scalar, int A_Rows, int A_Cols, int A_Options, int A_MaxRows, int A_MaxCols, typename BinOp>
-struct ScalarBinaryOpTraits<A_Scalar, Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols>, BinOp>
-{
- enum { Defined = 1 };
- typedef Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols> ReturnType;
-};
-
template<typename DerType, typename BinOp>
struct ScalarBinaryOpTraits<AutoDiffScalar<DerType>,typename DerType::Scalar,BinOp>
{
- enum { Defined = 1 };
typedef AutoDiffScalar<DerType> ReturnType;
};
template<typename DerType, typename BinOp>
struct ScalarBinaryOpTraits<typename DerType::Scalar,AutoDiffScalar<DerType>, BinOp>
{
- enum { Defined = 1 };
typedef AutoDiffScalar<DerType> ReturnType;
};
@@ -578,6 +562,15 @@ inline AutoDiffScalar<typename Eigen::internal::remove_all<DerType>::type::Plain
typedef AutoDiffScalar<typename Eigen::internal::remove_all<DerType>::type::PlainObject> ADS;
return (x > y ? ADS(x) : ADS(y));
}
+template<typename DerType>
+inline AutoDiffScalar<typename Eigen::internal::remove_all<DerType>::type::PlainObject> (min)(const AutoDiffScalar<DerType>& x, const AutoDiffScalar<DerType>& y) {
+ return (x.value() < y.value() ? x : y);
+}
+template<typename DerType>
+inline AutoDiffScalar<typename Eigen::internal::remove_all<DerType>::type::PlainObject> (max)(const AutoDiffScalar<DerType>& x, const AutoDiffScalar<DerType>& y) {
+ return (x.value() >= y.value() ? x : y);
+}
+
EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(abs,
using std::abs;
@@ -617,7 +610,8 @@ EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename internal::remove_all<DerType>::t
pow(const Eigen::AutoDiffScalar<DerType> &x, const typename internal::traits<typename internal::remove_all<DerType>::type>::Scalar &y)
{
using namespace Eigen;
- return Eigen::MakeAutoDiffScalar(std::pow(x.value(),y), x.derivatives() * (y * std::pow(x.value(),y-1)));
+ using std::pow;
+ return Eigen::MakeAutoDiffScalar(pow(x.value(),y), x.derivatives() * (y * pow(x.value(),y-1)));
}
@@ -672,13 +666,14 @@ EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(cosh,
#undef EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY
template<typename DerType> struct NumTraits<AutoDiffScalar<DerType> >
- : NumTraits< typename NumTraits<typename DerType::Scalar>::Real >
+ : NumTraits< typename NumTraits<typename internal::remove_all<DerType>::type::Scalar>::Real >
{
- typedef AutoDiffScalar<Matrix<typename NumTraits<typename DerType::Scalar>::Real,DerType::RowsAtCompileTime,DerType::ColsAtCompileTime,
- 0, DerType::MaxRowsAtCompileTime, DerType::MaxColsAtCompileTime> > Real;
+ typedef typename internal::remove_all<DerType>::type DerTypeCleaned;
+ typedef AutoDiffScalar<Matrix<typename NumTraits<typename DerTypeCleaned::Scalar>::Real,DerTypeCleaned::RowsAtCompileTime,DerTypeCleaned::ColsAtCompileTime,
+ 0, DerTypeCleaned::MaxRowsAtCompileTime, DerTypeCleaned::MaxColsAtCompileTime> > Real;
typedef AutoDiffScalar<DerType> NonInteger;
typedef AutoDiffScalar<DerType> Nested;
- typedef typename NumTraits<typename DerType::Scalar>::Literal Literal;
+ typedef typename NumTraits<typename DerTypeCleaned::Scalar>::Literal Literal;
enum{
RequireInitialization = 1
};
diff --git a/unsupported/Eigen/src/CMakeLists.txt b/unsupported/Eigen/src/CMakeLists.txt
index a7e8c7553..f42946793 100644
--- a/unsupported/Eigen/src/CMakeLists.txt
+++ b/unsupported/Eigen/src/CMakeLists.txt
@@ -11,5 +11,6 @@ ADD_SUBDIRECTORY(NumericalDiff)
ADD_SUBDIRECTORY(Polynomials)
ADD_SUBDIRECTORY(Skyline)
ADD_SUBDIRECTORY(SparseExtra)
+ADD_SUBDIRECTORY(SpecialFunctions)
ADD_SUBDIRECTORY(KroneckerProduct)
ADD_SUBDIRECTORY(Splines)
diff --git a/unsupported/Eigen/src/SpecialFunctions/CMakeLists.txt b/unsupported/Eigen/src/SpecialFunctions/CMakeLists.txt
new file mode 100644
index 000000000..25df9439d
--- /dev/null
+++ b/unsupported/Eigen/src/SpecialFunctions/CMakeLists.txt
@@ -0,0 +1,11 @@
+FILE(GLOB Eigen_SpecialFunctions_SRCS "*.h")
+INSTALL(FILES
+ ${Eigen_SpecialFunctions_SRCS}
+ DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/SpecialFunctions COMPONENT Devel
+ )
+
+FILE(GLOB Eigen_SpecialFunctions_arch_CUDA_SRCS "arch/CUDA/*.h")
+INSTALL(FILES
+ ${Eigen_SpecialFunctions_arch_CUDA_SRCS}
+ DESTINATION ${INCLUDE_INSTALL_DIR}/unsupported/Eigen/src/SpecialFunctions/arch/CUDA COMPONENT Devel
+ ) \ No newline at end of file
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h
new file mode 100644
index 000000000..ed415db99
--- /dev/null
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsArrayAPI.h
@@ -0,0 +1,124 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+#ifndef EIGEN_SPECIALFUNCTIONS_ARRAYAPI_H
+#define EIGEN_SPECIALFUNCTIONS_ARRAYAPI_H
+
+namespace Eigen {
+
+/** \cpp11 \returns an expression of the coefficient-wise igamma(\a a, \a x) to the given arrays.
+ *
+ * This function computes the coefficient-wise incomplete gamma function.
+ *
+ * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types,
+ * or float/double in non c++11 mode, the user has to provide implementations of igammac(T,T) for any scalar
+ * type T to be supported.
+ *
+ * \sa Eigen::igammac(), Eigen::lgamma()
+ */
+template<typename Derived,typename ExponentDerived>
+inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_op<typename Derived::Scalar>, const Derived, const ExponentDerived>
+igamma(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerived>& x)
+{
+ return Eigen::CwiseBinaryOp<Eigen::internal::scalar_igamma_op<typename Derived::Scalar>, const Derived, const ExponentDerived>(
+ a.derived(),
+ x.derived()
+ );
+}
+
+/** \cpp11 \returns an expression of the coefficient-wise igammac(\a a, \a x) to the given arrays.
+ *
+ * This function computes the coefficient-wise complementary incomplete gamma function.
+ *
+ * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types,
+ * or float/double in non c++11 mode, the user has to provide implementations of igammac(T,T) for any scalar
+ * type T to be supported.
+ *
+ * \sa Eigen::igamma(), Eigen::lgamma()
+ */
+template<typename Derived,typename ExponentDerived>
+inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_igammac_op<typename Derived::Scalar>, const Derived, const ExponentDerived>
+igammac(const Eigen::ArrayBase<Derived>& a, const Eigen::ArrayBase<ExponentDerived>& x)
+{
+ return Eigen::CwiseBinaryOp<Eigen::internal::scalar_igammac_op<typename Derived::Scalar>, const Derived, const ExponentDerived>(
+ a.derived(),
+ x.derived()
+ );
+}
+
+/** \cpp11 \returns an expression of the coefficient-wise polygamma(\a n, \a x) to the given arrays.
+ *
+ * It returns the \a n -th derivative of the digamma(psi) evaluated at \c x.
+ *
+ * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types,
+ * or float/double in non c++11 mode, the user has to provide implementations of polygamma(T,T) for any scalar
+ * type T to be supported.
+ *
+ * \sa Eigen::digamma()
+ */
+// * \warning Be careful with the order of the parameters: x.polygamma(n) is equivalent to polygamma(n,x)
+// * \sa ArrayBase::polygamma()
+template<typename DerivedN,typename DerivedX>
+inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_polygamma_op<typename DerivedX::Scalar>, const DerivedN, const DerivedX>
+polygamma(const Eigen::ArrayBase<DerivedN>& n, const Eigen::ArrayBase<DerivedX>& x)
+{
+ return Eigen::CwiseBinaryOp<Eigen::internal::scalar_polygamma_op<typename DerivedX::Scalar>, const DerivedN, const DerivedX>(
+ n.derived(),
+ x.derived()
+ );
+}
+
+/** \cpp11 \returns an expression of the coefficient-wise betainc(\a x, \a a, \a b) to the given arrays.
+ *
+ * This function computes the regularized incomplete beta function (integral).
+ *
+ * \note This function supports only float and double scalar types in c++11 mode. To support other scalar types,
+ * or float/double in non c++11 mode, the user has to provide implementations of betainc(T,T,T) for any scalar
+ * type T to be supported.
+ *
+ * \sa Eigen::betainc(), Eigen::lgamma()
+ */
+template<typename ArgADerived, typename ArgBDerived, typename ArgXDerived>
+inline const Eigen::CwiseTernaryOp<Eigen::internal::scalar_betainc_op<typename ArgXDerived::Scalar>, const ArgADerived, const ArgBDerived, const ArgXDerived>
+betainc(const Eigen::ArrayBase<ArgADerived>& a, const Eigen::ArrayBase<ArgBDerived>& b, const Eigen::ArrayBase<ArgXDerived>& x)
+{
+ return Eigen::CwiseTernaryOp<Eigen::internal::scalar_betainc_op<typename ArgXDerived::Scalar>, const ArgADerived, const ArgBDerived, const ArgXDerived>(
+ a.derived(),
+ b.derived(),
+ x.derived()
+ );
+}
+
+
+/** \returns an expression of the coefficient-wise zeta(\a x, \a q) to the given arrays.
+ *
+ * It returns the Riemann zeta function of two arguments \a x and \a q:
+ *
+ * \param x is the exposent, it must be > 1
+ * \param q is the shift, it must be > 0
+ *
+ * \note This function supports only float and double scalar types. To support other scalar types, the user has
+ * to provide implementations of zeta(T,T) for any scalar type T to be supported.
+ *
+ * \sa ArrayBase::zeta()
+ */
+template<typename DerivedX,typename DerivedQ>
+inline const Eigen::CwiseBinaryOp<Eigen::internal::scalar_zeta_op<typename DerivedX::Scalar>, const DerivedX, const DerivedQ>
+zeta(const Eigen::ArrayBase<DerivedX>& x, const Eigen::ArrayBase<DerivedQ>& q)
+{
+ return Eigen::CwiseBinaryOp<Eigen::internal::scalar_zeta_op<typename DerivedX::Scalar>, const DerivedX, const DerivedQ>(
+ x.derived(),
+ q.derived()
+ );
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_SPECIALFUNCTIONS_ARRAYAPI_H
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h
new file mode 100644
index 000000000..d8f2363be
--- /dev/null
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsFunctors.h
@@ -0,0 +1,236 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Eugene Brevdo <ebrevdo@gmail.com>
+// Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SPECIALFUNCTIONS_FUNCTORS_H
+#define EIGEN_SPECIALFUNCTIONS_FUNCTORS_H
+
+namespace Eigen {
+
+namespace internal {
+
+
+/** \internal
+ * \brief Template functor to compute the incomplete gamma function igamma(a, x)
+ *
+ * \sa class CwiseBinaryOp, Cwise::igamma
+ */
+template<typename Scalar> struct scalar_igamma_op : binary_op_base<Scalar,Scalar>
+{
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_igamma_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const {
+ using numext::igamma; return igamma(a, x);
+ }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const {
+ return internal::pigamma(a, x);
+ }
+};
+template<typename Scalar>
+struct functor_traits<scalar_igamma_op<Scalar> > {
+ enum {
+ // Guesstimate
+ Cost = 20 * NumTraits<Scalar>::MulCost + 10 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasIGamma
+ };
+};
+
+
+/** \internal
+ * \brief Template functor to compute the complementary incomplete gamma function igammac(a, x)
+ *
+ * \sa class CwiseBinaryOp, Cwise::igammac
+ */
+template<typename Scalar> struct scalar_igammac_op : binary_op_base<Scalar,Scalar>
+{
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_igammac_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& a, const Scalar& x) const {
+ using numext::igammac; return igammac(a, x);
+ }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& a, const Packet& x) const
+ {
+ return internal::pigammac(a, x);
+ }
+};
+template<typename Scalar>
+struct functor_traits<scalar_igammac_op<Scalar> > {
+ enum {
+ // Guesstimate
+ Cost = 20 * NumTraits<Scalar>::MulCost + 10 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasIGammac
+ };
+};
+
+
+/** \internal
+ * \brief Template functor to compute the incomplete beta integral betainc(a, b, x)
+ *
+ */
+template<typename Scalar> struct scalar_betainc_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_betainc_op)
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& x, const Scalar& a, const Scalar& b) const {
+ using numext::betainc; return betainc(x, a, b);
+ }
+ template<typename Packet>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet packetOp(const Packet& x, const Packet& a, const Packet& b) const
+ {
+ return internal::pbetainc(x, a, b);
+ }
+};
+template<typename Scalar>
+struct functor_traits<scalar_betainc_op<Scalar> > {
+ enum {
+ // Guesstimate
+ Cost = 400 * NumTraits<Scalar>::MulCost + 400 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasBetaInc
+ };
+};
+
+
+/** \internal
+ * \brief Template functor to compute the natural log of the absolute
+ * value of Gamma of a scalar
+ * \sa class CwiseUnaryOp, Cwise::lgamma()
+ */
+template<typename Scalar> struct scalar_lgamma_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_lgamma_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
+ using numext::lgamma; return lgamma(a);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plgamma(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_lgamma_op<Scalar> >
+{
+ enum {
+ // Guesstimate
+ Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasLGamma
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute psi, the derivative of lgamma of a scalar.
+ * \sa class CwiseUnaryOp, Cwise::digamma()
+ */
+template<typename Scalar> struct scalar_digamma_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_digamma_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
+ using numext::digamma; return digamma(a);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::pdigamma(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_digamma_op<Scalar> >
+{
+ enum {
+ // Guesstimate
+ Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasDiGamma
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the Riemann Zeta function of two arguments.
+ * \sa class CwiseUnaryOp, Cwise::zeta()
+ */
+template<typename Scalar> struct scalar_zeta_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_zeta_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& x, const Scalar& q) const {
+ using numext::zeta; return zeta(x, q);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& x, const Packet& q) const { return internal::pzeta(x, q); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_zeta_op<Scalar> >
+{
+ enum {
+ // Guesstimate
+ Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasZeta
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the polygamma function.
+ * \sa class CwiseUnaryOp, Cwise::polygamma()
+ */
+template<typename Scalar> struct scalar_polygamma_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_polygamma_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& n, const Scalar& x) const {
+ using numext::polygamma; return polygamma(n, x);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& n, const Packet& x) const { return internal::ppolygamma(n, x); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_polygamma_op<Scalar> >
+{
+ enum {
+ // Guesstimate
+ Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasPolygamma
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the Gauss error function of a
+ * scalar
+ * \sa class CwiseUnaryOp, Cwise::erf()
+ */
+template<typename Scalar> struct scalar_erf_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_erf_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
+ using numext::erf; return erf(a);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::perf(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_erf_op<Scalar> >
+{
+ enum {
+ // Guesstimate
+ Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasErf
+ };
+};
+
+/** \internal
+ * \brief Template functor to compute the Complementary Error Function
+ * of a scalar
+ * \sa class CwiseUnaryOp, Cwise::erfc()
+ */
+template<typename Scalar> struct scalar_erfc_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_erfc_op)
+ EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const {
+ using numext::erfc; return erfc(a);
+ }
+ typedef typename packet_traits<Scalar>::type Packet;
+ EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::perfc(a); }
+};
+template<typename Scalar>
+struct functor_traits<scalar_erfc_op<Scalar> >
+{
+ enum {
+ // Guesstimate
+ Cost = 10 * NumTraits<Scalar>::MulCost + 5 * NumTraits<Scalar>::AddCost,
+ PacketAccess = packet_traits<Scalar>::HasErfc
+ };
+};
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_SPECIALFUNCTIONS_FUNCTORS_H
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h
new file mode 100644
index 000000000..553bcda6a
--- /dev/null
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsHalf.h
@@ -0,0 +1,47 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SPECIALFUNCTIONS_HALF_H
+#define EIGEN_SPECIALFUNCTIONS_HALF_H
+
+namespace Eigen {
+namespace numext {
+
+#if EIGEN_HAS_C99_MATH
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half lgamma(const Eigen::half& a) {
+ return Eigen::half(Eigen::numext::lgamma(static_cast<float>(a)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half digamma(const Eigen::half& a) {
+ return Eigen::half(Eigen::numext::digamma(static_cast<float>(a)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half zeta(const Eigen::half& x, const Eigen::half& q) {
+ return Eigen::half(Eigen::numext::zeta(static_cast<float>(x), static_cast<float>(q)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half polygamma(const Eigen::half& n, const Eigen::half& x) {
+ return Eigen::half(Eigen::numext::polygamma(static_cast<float>(n), static_cast<float>(x)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erf(const Eigen::half& a) {
+ return Eigen::half(Eigen::numext::erf(static_cast<float>(a)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half erfc(const Eigen::half& a) {
+ return Eigen::half(Eigen::numext::erfc(static_cast<float>(a)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igamma(const Eigen::half& a, const Eigen::half& x) {
+ return Eigen::half(Eigen::numext::igamma(static_cast<float>(a), static_cast<float>(x)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half igammac(const Eigen::half& a, const Eigen::half& x) {
+ return Eigen::half(Eigen::numext::igammac(static_cast<float>(a), static_cast<float>(x)));
+}
+template<> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half betainc(const Eigen::half& a, const Eigen::half& b, const Eigen::half& x) {
+ return Eigen::half(Eigen::numext::betainc(static_cast<float>(a), static_cast<float>(b), static_cast<float>(x)));
+}
+#endif
+
+} // end namespace numext
+} // end namespace Eigen
+
+#endif // EIGEN_SPECIALFUNCTIONS_HALF_H
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
new file mode 100644
index 000000000..52619fc0c
--- /dev/null
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
@@ -0,0 +1,1551 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2015 Eugene Brevdo <ebrevdo@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SPECIAL_FUNCTIONS_H
+#define EIGEN_SPECIAL_FUNCTIONS_H
+
+namespace Eigen {
+namespace internal {
+
+// Parts of this code are based on the Cephes Math Library.
+//
+// Cephes Math Library Release 2.8: June, 2000
+// Copyright 1984, 1987, 1992, 2000 by Stephen L. Moshier
+//
+// Permission has been kindly provided by the original author
+// to incorporate the Cephes software into the Eigen codebase:
+//
+// From: Stephen Moshier
+// To: Eugene Brevdo
+// Subject: Re: Permission to wrap several cephes functions in Eigen
+//
+// Hello Eugene,
+//
+// Thank you for writing.
+//
+// If your licensing is similar to BSD, the formal way that has been
+// handled is simply to add a statement to the effect that you are incorporating
+// the Cephes software by permission of the author.
+//
+// Good luck with your project,
+// Steve
+
+namespace cephes {
+
+/* polevl (modified for Eigen)
+ *
+ * Evaluate polynomial
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * int N;
+ * Scalar x, y, coef[N+1];
+ *
+ * y = polevl<decltype(x), N>( x, coef);
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Evaluates polynomial of degree N:
+ *
+ * 2 N
+ * y = C + C x + C x +...+ C x
+ * 0 1 2 N
+ *
+ * Coefficients are stored in reverse order:
+ *
+ * coef[0] = C , ..., coef[N] = C .
+ * N 0
+ *
+ * The function p1evl() assumes that coef[N] = 1.0 and is
+ * omitted from the array. Its calling arguments are
+ * otherwise the same as polevl().
+ *
+ *
+ * The Eigen implementation is templatized. For best speed, store
+ * coef as a const array (constexpr), e.g.
+ *
+ * const double coef[] = {1.0, 2.0, 3.0, ...};
+ *
+ */
+template <typename Scalar, int N>
+struct polevl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Scalar x, const Scalar coef[]) {
+ EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
+
+ return polevl<Scalar, N - 1>::run(x, coef) * x + coef[N];
+ }
+};
+
+template <typename Scalar>
+struct polevl<Scalar, 0> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Scalar, const Scalar coef[]) {
+ return coef[0];
+ }
+};
+
+} // end namespace cephes
+
+/****************************************************************************
+ * Implementation of lgamma, requires C++11/C99 *
+ ****************************************************************************/
+
+template <typename Scalar>
+struct lgamma_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return Scalar(0);
+ }
+};
+
+template <typename Scalar>
+struct lgamma_retval {
+ typedef Scalar type;
+};
+
+#if EIGEN_HAS_C99_MATH
+template <>
+struct lgamma_impl<float> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE float run(float x) { return ::lgammaf(x); }
+};
+
+template <>
+struct lgamma_impl<double> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE double run(double x) { return ::lgamma(x); }
+};
+#endif
+
+/****************************************************************************
+ * Implementation of digamma (psi), based on Cephes *
+ ****************************************************************************/
+
+template <typename Scalar>
+struct digamma_retval {
+ typedef Scalar type;
+};
+
+/*
+ *
+ * Polynomial evaluation helper for the Psi (digamma) function.
+ *
+ * digamma_impl_maybe_poly::run(s) evaluates the asymptotic Psi expansion for
+ * input Scalar s, assuming s is above 10.0.
+ *
+ * If s is above a certain threshold for the given Scalar type, zero
+ * is returned. Otherwise the polynomial is evaluated with enough
+ * coefficients for results matching Scalar machine precision.
+ *
+ *
+ */
+template <typename Scalar>
+struct digamma_impl_maybe_poly {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return Scalar(0);
+ }
+};
+
+
+template <>
+struct digamma_impl_maybe_poly<float> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE float run(const float s) {
+ const float A[] = {
+ -4.16666666666666666667E-3f,
+ 3.96825396825396825397E-3f,
+ -8.33333333333333333333E-3f,
+ 8.33333333333333333333E-2f
+ };
+
+ float z;
+ if (s < 1.0e8f) {
+ z = 1.0f / (s * s);
+ return z * cephes::polevl<float, 3>::run(z, A);
+ } else return 0.0f;
+ }
+};
+
+template <>
+struct digamma_impl_maybe_poly<double> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE double run(const double s) {
+ const double A[] = {
+ 8.33333333333333333333E-2,
+ -2.10927960927960927961E-2,
+ 7.57575757575757575758E-3,
+ -4.16666666666666666667E-3,
+ 3.96825396825396825397E-3,
+ -8.33333333333333333333E-3,
+ 8.33333333333333333333E-2
+ };
+
+ double z;
+ if (s < 1.0e17) {
+ z = 1.0 / (s * s);
+ return z * cephes::polevl<double, 6>::run(z, A);
+ }
+ else return 0.0;
+ }
+};
+
+template <typename Scalar>
+struct digamma_impl {
+ EIGEN_DEVICE_FUNC
+ static Scalar run(Scalar x) {
+ /*
+ *
+ * Psi (digamma) function (modified for Eigen)
+ *
+ *
+ * SYNOPSIS:
+ *
+ * double x, y, psi();
+ *
+ * y = psi( x );
+ *
+ *
+ * DESCRIPTION:
+ *
+ * d -
+ * psi(x) = -- ln | (x)
+ * dx
+ *
+ * is the logarithmic derivative of the gamma function.
+ * For integer x,
+ * n-1
+ * -
+ * psi(n) = -EUL + > 1/k.
+ * -
+ * k=1
+ *
+ * If x is negative, it is transformed to a positive argument by the
+ * reflection formula psi(1-x) = psi(x) + pi cot(pi x).
+ * For general positive x, the argument is made greater than 10
+ * using the recurrence psi(x+1) = psi(x) + 1/x.
+ * Then the following asymptotic expansion is applied:
+ *
+ * inf. B
+ * - 2k
+ * psi(x) = log(x) - 1/2x - > -------
+ * - 2k
+ * k=1 2k x
+ *
+ * where the B2k are Bernoulli numbers.
+ *
+ * ACCURACY (float):
+ * Relative error (except absolute when |psi| < 1):
+ * arithmetic domain # trials peak rms
+ * IEEE 0,30 30000 1.3e-15 1.4e-16
+ * IEEE -30,0 40000 1.5e-15 2.2e-16
+ *
+ * ACCURACY (double):
+ * Absolute error, relative when |psi| > 1 :
+ * arithmetic domain # trials peak rms
+ * IEEE -33,0 30000 8.2e-7 1.2e-7
+ * IEEE 0,33 100000 7.3e-7 7.7e-8
+ *
+ * ERROR MESSAGES:
+ * message condition value returned
+ * psi singularity x integer <=0 INFINITY
+ */
+
+ Scalar p, q, nz, s, w, y;
+ bool negative = false;
+
+ const Scalar maxnum = NumTraits<Scalar>::infinity();
+ const Scalar m_pi = Scalar(EIGEN_PI);
+
+ const Scalar zero = Scalar(0);
+ const Scalar one = Scalar(1);
+ const Scalar half = Scalar(0.5);
+ nz = zero;
+
+ if (x <= zero) {
+ negative = true;
+ q = x;
+ p = numext::floor(q);
+ if (p == q) {
+ return maxnum;
+ }
+ /* Remove the zeros of tan(m_pi x)
+ * by subtracting the nearest integer from x
+ */
+ nz = q - p;
+ if (nz != half) {
+ if (nz > half) {
+ p += one;
+ nz = q - p;
+ }
+ nz = m_pi / numext::tan(m_pi * nz);
+ }
+ else {
+ nz = zero;
+ }
+ x = one - x;
+ }
+
+ /* use the recurrence psi(x+1) = psi(x) + 1/x. */
+ s = x;
+ w = zero;
+ while (s < Scalar(10)) {
+ w += one / s;
+ s += one;
+ }
+
+ y = digamma_impl_maybe_poly<Scalar>::run(s);
+
+ y = numext::log(s) - (half / s) - y - w;
+
+ return (negative) ? y - nz : y;
+ }
+};
+
+/****************************************************************************
+ * Implementation of erf, requires C++11/C99 *
+ ****************************************************************************/
+
+template <typename Scalar>
+struct erf_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return Scalar(0);
+ }
+};
+
+template <typename Scalar>
+struct erf_retval {
+ typedef Scalar type;
+};
+
+#if EIGEN_HAS_C99_MATH
+template <>
+struct erf_impl<float> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE float run(float x) { return ::erff(x); }
+};
+
+template <>
+struct erf_impl<double> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE double run(double x) { return ::erf(x); }
+};
+#endif // EIGEN_HAS_C99_MATH
+
+/***************************************************************************
+* Implementation of erfc, requires C++11/C99 *
+****************************************************************************/
+
+template <typename Scalar>
+struct erfc_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return Scalar(0);
+ }
+};
+
+template <typename Scalar>
+struct erfc_retval {
+ typedef Scalar type;
+};
+
+#if EIGEN_HAS_C99_MATH
+template <>
+struct erfc_impl<float> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE float run(const float x) { return ::erfcf(x); }
+};
+
+template <>
+struct erfc_impl<double> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE double run(const double x) { return ::erfc(x); }
+};
+#endif // EIGEN_HAS_C99_MATH
+
+/**************************************************************************************************************
+ * Implementation of igammac (complemented incomplete gamma integral), based on Cephes but requires C++11/C99 *
+ **************************************************************************************************************/
+
+template <typename Scalar>
+struct igammac_retval {
+ typedef Scalar type;
+};
+
+// NOTE: cephes_helper is also used to implement zeta
+template <typename Scalar>
+struct cephes_helper {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar machep() { assert(false && "machep not supported for this type"); return 0.0; }
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar big() { assert(false && "big not supported for this type"); return 0.0; }
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar biginv() { assert(false && "biginv not supported for this type"); return 0.0; }
+};
+
+template <>
+struct cephes_helper<float> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE float machep() {
+ return NumTraits<float>::epsilon() / 2; // 1.0 - machep == 1.0
+ }
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE float big() {
+ // use epsneg (1.0 - epsneg == 1.0)
+ return 1.0f / (NumTraits<float>::epsilon() / 2);
+ }
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE float biginv() {
+ // epsneg
+ return machep();
+ }
+};
+
+template <>
+struct cephes_helper<double> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE double machep() {
+ return NumTraits<double>::epsilon() / 2; // 1.0 - machep == 1.0
+ }
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE double big() {
+ return 1.0 / NumTraits<double>::epsilon();
+ }
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE double biginv() {
+ // inverse of eps
+ return NumTraits<double>::epsilon();
+ }
+};
+
+#if !EIGEN_HAS_C99_MATH
+
+template <typename Scalar>
+struct igammac_impl {
+ EIGEN_DEVICE_FUNC
+ static Scalar run(Scalar a, Scalar x) {
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return Scalar(0);
+ }
+};
+
+#else
+
+template <typename Scalar> struct igamma_impl; // predeclare igamma_impl
+
+template <typename Scalar>
+struct igammac_impl {
+ EIGEN_DEVICE_FUNC
+ static Scalar run(Scalar a, Scalar x) {
+ /* igamc()
+ *
+ * Incomplete gamma integral (modified for Eigen)
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * double a, x, y, igamc();
+ *
+ * y = igamc( a, x );
+ *
+ * DESCRIPTION:
+ *
+ * The function is defined by
+ *
+ *
+ * igamc(a,x) = 1 - igam(a,x)
+ *
+ * inf.
+ * -
+ * 1 | | -t a-1
+ * = ----- | e t dt.
+ * - | |
+ * | (a) -
+ * x
+ *
+ *
+ * In this implementation both arguments must be positive.
+ * The integral is evaluated by either a power series or
+ * continued fraction expansion, depending on the relative
+ * values of a and x.
+ *
+ * ACCURACY (float):
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0,30 30000 7.8e-6 5.9e-7
+ *
+ *
+ * ACCURACY (double):
+ *
+ * Tested at random a, x.
+ * a x Relative error:
+ * arithmetic domain domain # trials peak rms
+ * IEEE 0.5,100 0,100 200000 1.9e-14 1.7e-15
+ * IEEE 0.01,0.5 0,100 200000 1.4e-13 1.6e-15
+ *
+ */
+ /*
+ Cephes Math Library Release 2.2: June, 1992
+ Copyright 1985, 1987, 1992 by Stephen L. Moshier
+ Direct inquiries to 30 Frost Street, Cambridge, MA 02140
+ */
+ const Scalar zero = 0;
+ const Scalar one = 1;
+ const Scalar nan = NumTraits<Scalar>::quiet_NaN();
+
+ if ((x < zero) || (a <= zero)) {
+ // domain error
+ return nan;
+ }
+
+ if ((x < one) || (x < a)) {
+ /* The checks above ensure that we meet the preconditions for
+ * igamma_impl::Impl(), so call it, rather than igamma_impl::Run().
+ * Calling Run() would also work, but in that case the compiler may not be
+ * able to prove that igammac_impl::Run and igamma_impl::Run are not
+ * mutually recursive. This leads to worse code, particularly on
+ * platforms like nvptx, where recursion is allowed only begrudgingly.
+ */
+ return (one - igamma_impl<Scalar>::Impl(a, x));
+ }
+
+ return Impl(a, x);
+ }
+
+ private:
+ /* igamma_impl calls igammac_impl::Impl. */
+ friend struct igamma_impl<Scalar>;
+
+ /* Actually computes igamc(a, x).
+ *
+ * Preconditions:
+ * a > 0
+ * x >= 1
+ * x >= a
+ */
+ EIGEN_DEVICE_FUNC static Scalar Impl(Scalar a, Scalar x) {
+ const Scalar zero = 0;
+ const Scalar one = 1;
+ const Scalar two = 2;
+ const Scalar machep = cephes_helper<Scalar>::machep();
+ const Scalar maxlog = numext::log(NumTraits<Scalar>::highest());
+ const Scalar big = cephes_helper<Scalar>::big();
+ const Scalar biginv = cephes_helper<Scalar>::biginv();
+ const Scalar inf = NumTraits<Scalar>::infinity();
+
+ Scalar ans, ax, c, yc, r, t, y, z;
+ Scalar pk, pkm1, pkm2, qk, qkm1, qkm2;
+
+ if (x == inf) return zero; // std::isinf crashes on CUDA
+
+ /* Compute x**a * exp(-x) / gamma(a) */
+ ax = a * numext::log(x) - x - lgamma_impl<Scalar>::run(a);
+ if (ax < -maxlog) { // underflow
+ return zero;
+ }
+ ax = numext::exp(ax);
+
+ // continued fraction
+ y = one - a;
+ z = x + y + one;
+ c = zero;
+ pkm2 = one;
+ qkm2 = x;
+ pkm1 = x + one;
+ qkm1 = z * x;
+ ans = pkm1 / qkm1;
+
+ while (true) {
+ c += one;
+ y += one;
+ z += two;
+ yc = y * c;
+ pk = pkm1 * z - pkm2 * yc;
+ qk = qkm1 * z - qkm2 * yc;
+ if (qk != zero) {
+ r = pk / qk;
+ t = numext::abs((ans - r) / r);
+ ans = r;
+ } else {
+ t = one;
+ }
+ pkm2 = pkm1;
+ pkm1 = pk;
+ qkm2 = qkm1;
+ qkm1 = qk;
+ if (numext::abs(pk) > big) {
+ pkm2 *= biginv;
+ pkm1 *= biginv;
+ qkm2 *= biginv;
+ qkm1 *= biginv;
+ }
+ if (t <= machep) {
+ break;
+ }
+ }
+
+ return (ans * ax);
+ }
+};
+
+#endif // EIGEN_HAS_C99_MATH
+
+/************************************************************************************************
+ * Implementation of igamma (incomplete gamma integral), based on Cephes but requires C++11/C99 *
+ ************************************************************************************************/
+
+template <typename Scalar>
+struct igamma_retval {
+ typedef Scalar type;
+};
+
+#if !EIGEN_HAS_C99_MATH
+
+template <typename Scalar>
+struct igamma_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar x) {
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return Scalar(0);
+ }
+};
+
+#else
+
+template <typename Scalar>
+struct igamma_impl {
+ EIGEN_DEVICE_FUNC
+ static Scalar run(Scalar a, Scalar x) {
+ /* igam()
+ * Incomplete gamma integral
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * double a, x, y, igam();
+ *
+ * y = igam( a, x );
+ *
+ * DESCRIPTION:
+ *
+ * The function is defined by
+ *
+ * x
+ * -
+ * 1 | | -t a-1
+ * igam(a,x) = ----- | e t dt.
+ * - | |
+ * | (a) -
+ * 0
+ *
+ *
+ * In this implementation both arguments must be positive.
+ * The integral is evaluated by either a power series or
+ * continued fraction expansion, depending on the relative
+ * values of a and x.
+ *
+ * ACCURACY (double):
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0,30 200000 3.6e-14 2.9e-15
+ * IEEE 0,100 300000 9.9e-14 1.5e-14
+ *
+ *
+ * ACCURACY (float):
+ *
+ * Relative error:
+ * arithmetic domain # trials peak rms
+ * IEEE 0,30 20000 7.8e-6 5.9e-7
+ *
+ */
+ /*
+ Cephes Math Library Release 2.2: June, 1992
+ Copyright 1985, 1987, 1992 by Stephen L. Moshier
+ Direct inquiries to 30 Frost Street, Cambridge, MA 02140
+ */
+
+
+ /* left tail of incomplete gamma function:
+ *
+ * inf. k
+ * a -x - x
+ * x e > ----------
+ * - -
+ * k=0 | (a+k+1)
+ *
+ */
+ const Scalar zero = 0;
+ const Scalar one = 1;
+ const Scalar nan = NumTraits<Scalar>::quiet_NaN();
+
+ if (x == zero) return zero;
+
+ if ((x < zero) || (a <= zero)) { // domain error
+ return nan;
+ }
+
+ if ((x > one) && (x > a)) {
+ /* The checks above ensure that we meet the preconditions for
+ * igammac_impl::Impl(), so call it, rather than igammac_impl::Run().
+ * Calling Run() would also work, but in that case the compiler may not be
+ * able to prove that igammac_impl::Run and igamma_impl::Run are not
+ * mutually recursive. This leads to worse code, particularly on
+ * platforms like nvptx, where recursion is allowed only begrudgingly.
+ */
+ return (one - igammac_impl<Scalar>::Impl(a, x));
+ }
+
+ return Impl(a, x);
+ }
+
+ private:
+ /* igammac_impl calls igamma_impl::Impl. */
+ friend struct igammac_impl<Scalar>;
+
+ /* Actually computes igam(a, x).
+ *
+ * Preconditions:
+ * x > 0
+ * a > 0
+ * !(x > 1 && x > a)
+ */
+ EIGEN_DEVICE_FUNC static Scalar Impl(Scalar a, Scalar x) {
+ const Scalar zero = 0;
+ const Scalar one = 1;
+ const Scalar machep = cephes_helper<Scalar>::machep();
+ const Scalar maxlog = numext::log(NumTraits<Scalar>::highest());
+
+ Scalar ans, ax, c, r;
+
+ /* Compute x**a * exp(-x) / gamma(a) */
+ ax = a * numext::log(x) - x - lgamma_impl<Scalar>::run(a);
+ if (ax < -maxlog) {
+ // underflow
+ return zero;
+ }
+ ax = numext::exp(ax);
+
+ /* power series */
+ r = a;
+ c = one;
+ ans = one;
+
+ while (true) {
+ r += one;
+ c *= x/r;
+ ans += c;
+ if (c/ans <= machep) {
+ break;
+ }
+ }
+
+ return (ans * ax / a);
+ }
+};
+
+#endif // EIGEN_HAS_C99_MATH
+
+/*****************************************************************************
+ * Implementation of Riemann zeta function of two arguments, based on Cephes *
+ *****************************************************************************/
+
+template <typename Scalar>
+struct zeta_retval {
+ typedef Scalar type;
+};
+
+template <typename Scalar>
+struct zeta_impl_series {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(const Scalar) {
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return Scalar(0);
+ }
+};
+
+template <>
+struct zeta_impl_series<float> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE bool run(float& a, float& b, float& s, const float x, const float machep) {
+ int i = 0;
+ while(i < 9)
+ {
+ i += 1;
+ a += 1.0f;
+ b = numext::pow( a, -x );
+ s += b;
+ if( numext::abs(b/s) < machep )
+ return true;
+ }
+
+ //Return whether we are done
+ return false;
+ }
+};
+
+template <>
+struct zeta_impl_series<double> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE bool run(double& a, double& b, double& s, const double x, const double machep) {
+ int i = 0;
+ while( (i < 9) || (a <= 9.0) )
+ {
+ i += 1;
+ a += 1.0;
+ b = numext::pow( a, -x );
+ s += b;
+ if( numext::abs(b/s) < machep )
+ return true;
+ }
+
+ //Return whether we are done
+ return false;
+ }
+};
+
+template <typename Scalar>
+struct zeta_impl {
+ EIGEN_DEVICE_FUNC
+ static Scalar run(Scalar x, Scalar q) {
+ /* zeta.c
+ *
+ * Riemann zeta function of two arguments
+ *
+ *
+ *
+ * SYNOPSIS:
+ *
+ * double x, q, y, zeta();
+ *
+ * y = zeta( x, q );
+ *
+ *
+ *
+ * DESCRIPTION:
+ *
+ *
+ *
+ * inf.
+ * - -x
+ * zeta(x,q) = > (k+q)
+ * -
+ * k=0
+ *
+ * where x > 1 and q is not a negative integer or zero.
+ * The Euler-Maclaurin summation formula is used to obtain
+ * the expansion
+ *
+ * n
+ * - -x
+ * zeta(x,q) = > (k+q)
+ * -
+ * k=1
+ *
+ * 1-x inf. B x(x+1)...(x+2j)
+ * (n+q) 1 - 2j
+ * + --------- - ------- + > --------------------
+ * x-1 x - x+2j+1
+ * 2(n+q) j=1 (2j)! (n+q)
+ *
+ * where the B2j are Bernoulli numbers. Note that (see zetac.c)
+ * zeta(x,1) = zetac(x) + 1.
+ *
+ *
+ *
+ * ACCURACY:
+ *
+ * Relative error for single precision:
+ * arithmetic domain # trials peak rms
+ * IEEE 0,25 10000 6.9e-7 1.0e-7
+ *
+ * Large arguments may produce underflow in powf(), in which
+ * case the results are inaccurate.
+ *
+ * REFERENCE:
+ *
+ * Gradshteyn, I. S., and I. M. Ryzhik, Tables of Integrals,
+ * Series, and Products, p. 1073; Academic Press, 1980.
+ *
+ */
+
+ int i;
+ Scalar p, r, a, b, k, s, t, w;
+
+ const Scalar A[] = {
+ Scalar(12.0),
+ Scalar(-720.0),
+ Scalar(30240.0),
+ Scalar(-1209600.0),
+ Scalar(47900160.0),
+ Scalar(-1.8924375803183791606e9), /*1.307674368e12/691*/
+ Scalar(7.47242496e10),
+ Scalar(-2.950130727918164224e12), /*1.067062284288e16/3617*/
+ Scalar(1.1646782814350067249e14), /*5.109094217170944e18/43867*/
+ Scalar(-4.5979787224074726105e15), /*8.028576626982912e20/174611*/
+ Scalar(1.8152105401943546773e17), /*1.5511210043330985984e23/854513*/
+ Scalar(-7.1661652561756670113e18) /*1.6938241367317436694528e27/236364091*/
+ };
+
+ const Scalar maxnum = NumTraits<Scalar>::infinity();
+ const Scalar zero = 0.0, half = 0.5, one = 1.0;
+ const Scalar machep = cephes_helper<Scalar>::machep();
+ const Scalar nan = NumTraits<Scalar>::quiet_NaN();
+
+ if( x == one )
+ return maxnum;
+
+ if( x < one )
+ {
+ return nan;
+ }
+
+ if( q <= zero )
+ {
+ if(q == numext::floor(q))
+ {
+ return maxnum;
+ }
+ p = x;
+ r = numext::floor(p);
+ if (p != r)
+ return nan;
+ }
+
+ /* Permit negative q but continue sum until n+q > +9 .
+ * This case should be handled by a reflection formula.
+ * If q<0 and x is an integer, there is a relation to
+ * the polygamma function.
+ */
+ s = numext::pow( q, -x );
+ a = q;
+ b = zero;
+ // Run the summation in a helper function that is specific to the floating precision
+ if (zeta_impl_series<Scalar>::run(a, b, s, x, machep)) {
+ return s;
+ }
+
+ w = a;
+ s += b*w/(x-one);
+ s -= half * b;
+ a = one;
+ k = zero;
+ for( i=0; i<12; i++ )
+ {
+ a *= x + k;
+ b /= w;
+ t = a*b/A[i];
+ s = s + t;
+ t = numext::abs(t/s);
+ if( t < machep ) {
+ break;
+ }
+ k += one;
+ a *= x + k;
+ b /= w;
+ k += one;
+ }
+ return s;
+ }
+};
+
+/****************************************************************************
+ * Implementation of polygamma function, requires C++11/C99 *
+ ****************************************************************************/
+
+template <typename Scalar>
+struct polygamma_retval {
+ typedef Scalar type;
+};
+
+#if !EIGEN_HAS_C99_MATH
+
+template <typename Scalar>
+struct polygamma_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(Scalar n, Scalar x) {
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return Scalar(0);
+ }
+};
+
+#else
+
+template <typename Scalar>
+struct polygamma_impl {
+ EIGEN_DEVICE_FUNC
+ static Scalar run(Scalar n, Scalar x) {
+ Scalar zero = 0.0, one = 1.0;
+ Scalar nplus = n + one;
+ const Scalar nan = NumTraits<Scalar>::quiet_NaN();
+
+ // Check that n is an integer
+ if (numext::floor(n) != n) {
+ return nan;
+ }
+ // Just return the digamma function for n = 1
+ else if (n == zero) {
+ return digamma_impl<Scalar>::run(x);
+ }
+ // Use the same implementation as scipy
+ else {
+ Scalar factorial = numext::exp(lgamma_impl<Scalar>::run(nplus));
+ return numext::pow(-one, nplus) * factorial * zeta_impl<Scalar>::run(nplus, x);
+ }
+ }
+};
+
+#endif // EIGEN_HAS_C99_MATH
+
+/************************************************************************************************
+ * Implementation of betainc (incomplete beta integral), based on Cephes but requires C++11/C99 *
+ ************************************************************************************************/
+
+template <typename Scalar>
+struct betainc_retval {
+ typedef Scalar type;
+};
+
+#if !EIGEN_HAS_C99_MATH
+
+template <typename Scalar>
+struct betainc_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar b, Scalar x) {
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return Scalar(0);
+ }
+};
+
+#else
+
+template <typename Scalar>
+struct betainc_impl {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(Scalar, Scalar, Scalar) {
+ /* betaincf.c
+ *
+ * Incomplete beta integral
+ *
+ *
+ * SYNOPSIS:
+ *
+ * float a, b, x, y, betaincf();
+ *
+ * y = betaincf( a, b, x );
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns incomplete beta integral of the arguments, evaluated
+ * from zero to x. The function is defined as
+ *
+ * x
+ * - -
+ * | (a+b) | | a-1 b-1
+ * ----------- | t (1-t) dt.
+ * - - | |
+ * | (a) | (b) -
+ * 0
+ *
+ * The domain of definition is 0 <= x <= 1. In this
+ * implementation a and b are restricted to positive values.
+ * The integral from x to 1 may be obtained by the symmetry
+ * relation
+ *
+ * 1 - betainc( a, b, x ) = betainc( b, a, 1-x ).
+ *
+ * The integral is evaluated by a continued fraction expansion.
+ * If a < 1, the function calls itself recursively after a
+ * transformation to increase a to a+1.
+ *
+ * ACCURACY (float):
+ *
+ * Tested at random points (a,b,x) with a and b in the indicated
+ * interval and x between 0 and 1.
+ *
+ * arithmetic domain # trials peak rms
+ * Relative error:
+ * IEEE 0,30 10000 3.7e-5 5.1e-6
+ * IEEE 0,100 10000 1.7e-4 2.5e-5
+ * The useful domain for relative error is limited by underflow
+ * of the single precision exponential function.
+ * Absolute error:
+ * IEEE 0,30 100000 2.2e-5 9.6e-7
+ * IEEE 0,100 10000 6.5e-5 3.7e-6
+ *
+ * Larger errors may occur for extreme ratios of a and b.
+ *
+ * ACCURACY (double):
+ * arithmetic domain # trials peak rms
+ * IEEE 0,5 10000 6.9e-15 4.5e-16
+ * IEEE 0,85 250000 2.2e-13 1.7e-14
+ * IEEE 0,1000 30000 5.3e-12 6.3e-13
+ * IEEE 0,10000 250000 9.3e-11 7.1e-12
+ * IEEE 0,100000 10000 8.7e-10 4.8e-11
+ * Outputs smaller than the IEEE gradual underflow threshold
+ * were excluded from these statistics.
+ *
+ * ERROR MESSAGES:
+ * message condition value returned
+ * incbet domain x<0, x>1 nan
+ * incbet underflow nan
+ */
+
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, Scalar>::value == false),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ return Scalar(0);
+ }
+};
+
+/* Continued fraction expansion #1 for incomplete beta integral (small_branch = True)
+ * Continued fraction expansion #2 for incomplete beta integral (small_branch = False)
+ */
+template <typename Scalar>
+struct incbeta_cfe {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE Scalar run(Scalar a, Scalar b, Scalar x, bool small_branch) {
+ EIGEN_STATIC_ASSERT((internal::is_same<Scalar, float>::value ||
+ internal::is_same<Scalar, double>::value),
+ THIS_TYPE_IS_NOT_SUPPORTED);
+ const Scalar big = cephes_helper<Scalar>::big();
+ const Scalar machep = cephes_helper<Scalar>::machep();
+ const Scalar biginv = cephes_helper<Scalar>::biginv();
+
+ const Scalar zero = 0;
+ const Scalar one = 1;
+ const Scalar two = 2;
+
+ Scalar xk, pk, pkm1, pkm2, qk, qkm1, qkm2;
+ Scalar k1, k2, k3, k4, k5, k6, k7, k8, k26update;
+ Scalar ans;
+ int n;
+
+ const int num_iters = (internal::is_same<Scalar, float>::value) ? 100 : 300;
+ const Scalar thresh =
+ (internal::is_same<Scalar, float>::value) ? machep : Scalar(3) * machep;
+ Scalar r = (internal::is_same<Scalar, float>::value) ? zero : one;
+
+ if (small_branch) {
+ k1 = a;
+ k2 = a + b;
+ k3 = a;
+ k4 = a + one;
+ k5 = one;
+ k6 = b - one;
+ k7 = k4;
+ k8 = a + two;
+ k26update = one;
+ } else {
+ k1 = a;
+ k2 = b - one;
+ k3 = a;
+ k4 = a + one;
+ k5 = one;
+ k6 = a + b;
+ k7 = a + one;
+ k8 = a + two;
+ k26update = -one;
+ x = x / (one - x);
+ }
+
+ pkm2 = zero;
+ qkm2 = one;
+ pkm1 = one;
+ qkm1 = one;
+ ans = one;
+ n = 0;
+
+ do {
+ xk = -(x * k1 * k2) / (k3 * k4);
+ pk = pkm1 + pkm2 * xk;
+ qk = qkm1 + qkm2 * xk;
+ pkm2 = pkm1;
+ pkm1 = pk;
+ qkm2 = qkm1;
+ qkm1 = qk;
+
+ xk = (x * k5 * k6) / (k7 * k8);
+ pk = pkm1 + pkm2 * xk;
+ qk = qkm1 + qkm2 * xk;
+ pkm2 = pkm1;
+ pkm1 = pk;
+ qkm2 = qkm1;
+ qkm1 = qk;
+
+ if (qk != zero) {
+ r = pk / qk;
+ if (numext::abs(ans - r) < numext::abs(r) * thresh) {
+ return r;
+ }
+ ans = r;
+ }
+
+ k1 += one;
+ k2 += k26update;
+ k3 += two;
+ k4 += two;
+ k5 += one;
+ k6 -= k26update;
+ k7 += two;
+ k8 += two;
+
+ if ((numext::abs(qk) + numext::abs(pk)) > big) {
+ pkm2 *= biginv;
+ pkm1 *= biginv;
+ qkm2 *= biginv;
+ qkm1 *= biginv;
+ }
+ if ((numext::abs(qk) < biginv) || (numext::abs(pk) < biginv)) {
+ pkm2 *= big;
+ pkm1 *= big;
+ qkm2 *= big;
+ qkm1 *= big;
+ }
+ } while (++n < num_iters);
+
+ return ans;
+ }
+};
+
+/* Helper functions depending on the Scalar type */
+template <typename Scalar>
+struct betainc_helper {};
+
+template <>
+struct betainc_helper<float> {
+ /* Core implementation, assumes a large (> 1.0) */
+ EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE float incbsa(float aa, float bb,
+ float xx) {
+ float ans, a, b, t, x, onemx;
+ bool reversed_a_b = false;
+
+ onemx = 1.0f - xx;
+
+ /* see if x is greater than the mean */
+ if (xx > (aa / (aa + bb))) {
+ reversed_a_b = true;
+ a = bb;
+ b = aa;
+ t = xx;
+ x = onemx;
+ } else {
+ a = aa;
+ b = bb;
+ t = onemx;
+ x = xx;
+ }
+
+ /* Choose expansion for optimal convergence */
+ if (b > 10.0f) {
+ if (numext::abs(b * x / a) < 0.3f) {
+ t = betainc_helper<float>::incbps(a, b, x);
+ if (reversed_a_b) t = 1.0f - t;
+ return t;
+ }
+ }
+
+ ans = x * (a + b - 2.0f) / (a - 1.0f);
+ if (ans < 1.0f) {
+ ans = incbeta_cfe<float>::run(a, b, x, true /* small_branch */);
+ t = b * numext::log(t);
+ } else {
+ ans = incbeta_cfe<float>::run(a, b, x, false /* small_branch */);
+ t = (b - 1.0f) * numext::log(t);
+ }
+
+ t += a * numext::log(x) + lgamma_impl<float>::run(a + b) -
+ lgamma_impl<float>::run(a) - lgamma_impl<float>::run(b);
+ t += numext::log(ans / a);
+ t = numext::exp(t);
+
+ if (reversed_a_b) t = 1.0f - t;
+ return t;
+ }
+
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE float incbps(float a, float b, float x) {
+ float t, u, y, s;
+ const float machep = cephes_helper<float>::machep();
+
+ y = a * numext::log(x) + (b - 1.0f) * numext::log1p(-x) - numext::log(a);
+ y -= lgamma_impl<float>::run(a) + lgamma_impl<float>::run(b);
+ y += lgamma_impl<float>::run(a + b);
+
+ t = x / (1.0f - x);
+ s = 0.0f;
+ u = 1.0f;
+ do {
+ b -= 1.0f;
+ if (b == 0.0f) {
+ break;
+ }
+ a += 1.0f;
+ u *= t * b / a;
+ s += u;
+ } while (numext::abs(u) > machep);
+
+ return numext::exp(y) * (1.0f + s);
+ }
+};
+
+template <>
+struct betainc_impl<float> {
+ EIGEN_DEVICE_FUNC
+ static float run(float a, float b, float x) {
+ const float nan = NumTraits<float>::quiet_NaN();
+ float ans, t;
+
+ if (a <= 0.0f) return nan;
+ if (b <= 0.0f) return nan;
+ if ((x <= 0.0f) || (x >= 1.0f)) {
+ if (x == 0.0f) return 0.0f;
+ if (x == 1.0f) return 1.0f;
+ // mtherr("betaincf", DOMAIN);
+ return nan;
+ }
+
+ /* transformation for small aa */
+ if (a <= 1.0f) {
+ ans = betainc_helper<float>::incbsa(a + 1.0f, b, x);
+ t = a * numext::log(x) + b * numext::log1p(-x) +
+ lgamma_impl<float>::run(a + b) - lgamma_impl<float>::run(a + 1.0f) -
+ lgamma_impl<float>::run(b);
+ return (ans + numext::exp(t));
+ } else {
+ return betainc_helper<float>::incbsa(a, b, x);
+ }
+ }
+};
+
+template <>
+struct betainc_helper<double> {
+ EIGEN_DEVICE_FUNC
+ static EIGEN_STRONG_INLINE double incbps(double a, double b, double x) {
+ const double machep = cephes_helper<double>::machep();
+
+ double s, t, u, v, n, t1, z, ai;
+
+ ai = 1.0 / a;
+ u = (1.0 - b) * x;
+ v = u / (a + 1.0);
+ t1 = v;
+ t = u;
+ n = 2.0;
+ s = 0.0;
+ z = machep * ai;
+ while (numext::abs(v) > z) {
+ u = (n - b) * x / n;
+ t *= u;
+ v = t / (a + n);
+ s += v;
+ n += 1.0;
+ }
+ s += t1;
+ s += ai;
+
+ u = a * numext::log(x);
+ // TODO: gamma() is not directly implemented in Eigen.
+ /*
+ if ((a + b) < maxgam && numext::abs(u) < maxlog) {
+ t = gamma(a + b) / (gamma(a) * gamma(b));
+ s = s * t * pow(x, a);
+ } else {
+ */
+ t = lgamma_impl<double>::run(a + b) - lgamma_impl<double>::run(a) -
+ lgamma_impl<double>::run(b) + u + numext::log(s);
+ return s = numext::exp(t);
+ }
+};
+
+template <>
+struct betainc_impl<double> {
+ EIGEN_DEVICE_FUNC
+ static double run(double aa, double bb, double xx) {
+ const double nan = NumTraits<double>::quiet_NaN();
+ const double machep = cephes_helper<double>::machep();
+ // const double maxgam = 171.624376956302725;
+
+ double a, b, t, x, xc, w, y;
+ bool reversed_a_b = false;
+
+ if (aa <= 0.0 || bb <= 0.0) {
+ return nan; // goto domerr;
+ }
+
+ if ((xx <= 0.0) || (xx >= 1.0)) {
+ if (xx == 0.0) return (0.0);
+ if (xx == 1.0) return (1.0);
+ // mtherr("incbet", DOMAIN);
+ return nan;
+ }
+
+ if ((bb * xx) <= 1.0 && xx <= 0.95) {
+ return betainc_helper<double>::incbps(aa, bb, xx);
+ }
+
+ w = 1.0 - xx;
+
+ /* Reverse a and b if x is greater than the mean. */
+ if (xx > (aa / (aa + bb))) {
+ reversed_a_b = true;
+ a = bb;
+ b = aa;
+ xc = xx;
+ x = w;
+ } else {
+ a = aa;
+ b = bb;
+ xc = w;
+ x = xx;
+ }
+
+ if (reversed_a_b && (b * x) <= 1.0 && x <= 0.95) {
+ t = betainc_helper<double>::incbps(a, b, x);
+ if (t <= machep) {
+ t = 1.0 - machep;
+ } else {
+ t = 1.0 - t;
+ }
+ return t;
+ }
+
+ /* Choose expansion for better convergence. */
+ y = x * (a + b - 2.0) - (a - 1.0);
+ if (y < 0.0) {
+ w = incbeta_cfe<double>::run(a, b, x, true /* small_branch */);
+ } else {
+ w = incbeta_cfe<double>::run(a, b, x, false /* small_branch */) / xc;
+ }
+
+ /* Multiply w by the factor
+ a b _ _ _
+ x (1-x) | (a+b) / ( a | (a) | (b) ) . */
+
+ y = a * numext::log(x);
+ t = b * numext::log(xc);
+ // TODO: gamma is not directly implemented in Eigen.
+ /*
+ if ((a + b) < maxgam && numext::abs(y) < maxlog && numext::abs(t) < maxlog)
+ {
+ t = pow(xc, b);
+ t *= pow(x, a);
+ t /= a;
+ t *= w;
+ t *= gamma(a + b) / (gamma(a) * gamma(b));
+ } else {
+ */
+ /* Resort to logarithms. */
+ y += t + lgamma_impl<double>::run(a + b) - lgamma_impl<double>::run(a) -
+ lgamma_impl<double>::run(b);
+ y += numext::log(w / a);
+ t = numext::exp(y);
+
+ /* } */
+ // done:
+
+ if (reversed_a_b) {
+ if (t <= machep) {
+ t = 1.0 - machep;
+ } else {
+ t = 1.0 - t;
+ }
+ }
+ return t;
+ }
+};
+
+#endif // EIGEN_HAS_C99_MATH
+
+} // end namespace internal
+
+namespace numext {
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(lgamma, Scalar)
+ lgamma(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(lgamma, Scalar)::run(x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(digamma, Scalar)
+ digamma(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(digamma, Scalar)::run(x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(zeta, Scalar)
+zeta(const Scalar& x, const Scalar& q) {
+ return EIGEN_MATHFUNC_IMPL(zeta, Scalar)::run(x, q);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(polygamma, Scalar)
+polygamma(const Scalar& n, const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(polygamma, Scalar)::run(n, x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erf, Scalar)
+ erf(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(erf, Scalar)::run(x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(erfc, Scalar)
+ erfc(const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(erfc, Scalar)::run(x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igamma, Scalar)
+ igamma(const Scalar& a, const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(igamma, Scalar)::run(a, x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(igammac, Scalar)
+ igammac(const Scalar& a, const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(igammac, Scalar)::run(a, x);
+}
+
+template <typename Scalar>
+EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(betainc, Scalar)
+ betainc(const Scalar& a, const Scalar& b, const Scalar& x) {
+ return EIGEN_MATHFUNC_IMPL(betainc, Scalar)::run(a, b, x);
+}
+
+} // end namespace numext
+
+
+} // end namespace Eigen
+
+#endif // EIGEN_SPECIAL_FUNCTIONS_H
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
new file mode 100644
index 000000000..46d60d323
--- /dev/null
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsPacketMath.h
@@ -0,0 +1,58 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_SPECIALFUNCTIONS_PACKETMATH_H
+#define EIGEN_SPECIALFUNCTIONS_PACKETMATH_H
+
+namespace Eigen {
+
+namespace internal {
+
+/** \internal \returns the ln(|gamma(\a a)|) (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet plgamma(const Packet& a) { using numext::lgamma; return lgamma(a); }
+
+/** \internal \returns the derivative of lgamma, psi(\a a) (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pdigamma(const Packet& a) { using numext::digamma; return digamma(a); }
+
+/** \internal \returns the zeta function of two arguments (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet pzeta(const Packet& x, const Packet& q) { using numext::zeta; return zeta(x, q); }
+
+/** \internal \returns the polygamma function (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet ppolygamma(const Packet& n, const Packet& x) { using numext::polygamma; return polygamma(n, x); }
+
+/** \internal \returns the erf(\a a) (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet perf(const Packet& a) { using numext::erf; return erf(a); }
+
+/** \internal \returns the erfc(\a a) (coeff-wise) */
+template<typename Packet> EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
+Packet perfc(const Packet& a) { using numext::erfc; return erfc(a); }
+
+/** \internal \returns the incomplete gamma function igamma(\a a, \a x) */
+template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+Packet pigamma(const Packet& a, const Packet& x) { using numext::igamma; return igamma(a, x); }
+
+/** \internal \returns the complementary incomplete gamma function igammac(\a a, \a x) */
+template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+Packet pigammac(const Packet& a, const Packet& x) { using numext::igammac; return igammac(a, x); }
+
+/** \internal \returns the complementary incomplete gamma function betainc(\a a, \a b, \a x) */
+template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+Packet pbetainc(const Packet& a, const Packet& b,const Packet& x) { using numext::betainc; return betainc(a, b, x); }
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_SPECIALFUNCTIONS_PACKETMATH_H
+
diff --git a/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h b/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h
new file mode 100644
index 000000000..0ef440495
--- /dev/null
+++ b/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h
@@ -0,0 +1,165 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_CUDA_SPECIALFUNCTIONS_H
+#define EIGEN_CUDA_SPECIALFUNCTIONS_H
+
+namespace Eigen {
+
+namespace internal {
+
+// Make sure this is only available when targeting a GPU: we don't want to
+// introduce conflicts between these packet_traits definitions and the ones
+// we'll use on the host side (SSE, AVX, ...)
+#if defined(__CUDACC__) && defined(EIGEN_USE_GPU)
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 plgamma<float4>(const float4& a)
+{
+ return make_float4(lgammaf(a.x), lgammaf(a.y), lgammaf(a.z), lgammaf(a.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 plgamma<double2>(const double2& a)
+{
+ using numext::lgamma;
+ return make_double2(lgamma(a.x), lgamma(a.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 pdigamma<float4>(const float4& a)
+{
+ using numext::digamma;
+ return make_float4(digamma(a.x), digamma(a.y), digamma(a.z), digamma(a.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 pdigamma<double2>(const double2& a)
+{
+ using numext::digamma;
+ return make_double2(digamma(a.x), digamma(a.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 pzeta<float4>(const float4& x, const float4& q)
+{
+ using numext::zeta;
+ return make_float4(zeta(x.x, q.x), zeta(x.y, q.y), zeta(x.z, q.z), zeta(x.w, q.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 pzeta<double2>(const double2& x, const double2& q)
+{
+ using numext::zeta;
+ return make_double2(zeta(x.x, q.x), zeta(x.y, q.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 ppolygamma<float4>(const float4& n, const float4& x)
+{
+ using numext::polygamma;
+ return make_float4(polygamma(n.x, x.x), polygamma(n.y, x.y), polygamma(n.z, x.z), polygamma(n.w, x.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 ppolygamma<double2>(const double2& n, const double2& x)
+{
+ using numext::polygamma;
+ return make_double2(polygamma(n.x, x.x), polygamma(n.y, x.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 perf<float4>(const float4& a)
+{
+ return make_float4(erff(a.x), erff(a.y), erff(a.z), erff(a.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 perf<double2>(const double2& a)
+{
+ using numext::erf;
+ return make_double2(erf(a.x), erf(a.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 perfc<float4>(const float4& a)
+{
+ using std::erfcf;
+ return make_float4(erfcf(a.x), erfcf(a.y), erfcf(a.z), erfcf(a.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 perfc<double2>(const double2& a)
+{
+ using numext::erfc;
+ return make_double2(erfc(a.x), erfc(a.y));
+}
+
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 pigamma<float4>(const float4& a, const float4& x)
+{
+ using numext::igamma;
+ return make_float4(
+ igamma(a.x, x.x),
+ igamma(a.y, x.y),
+ igamma(a.z, x.z),
+ igamma(a.w, x.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 pigamma<double2>(const double2& a, const double2& x)
+{
+ using numext::igamma;
+ return make_double2(igamma(a.x, x.x), igamma(a.y, x.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 pigammac<float4>(const float4& a, const float4& x)
+{
+ using numext::igammac;
+ return make_float4(
+ igammac(a.x, x.x),
+ igammac(a.y, x.y),
+ igammac(a.z, x.z),
+ igammac(a.w, x.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 pigammac<double2>(const double2& a, const double2& x)
+{
+ using numext::igammac;
+ return make_double2(igammac(a.x, x.x), igammac(a.y, x.y));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+float4 pbetainc<float4>(const float4& a, const float4& b, const float4& x)
+{
+ using numext::betainc;
+ return make_float4(
+ betainc(a.x, b.x, x.x),
+ betainc(a.y, b.y, x.y),
+ betainc(a.z, b.z, x.z),
+ betainc(a.w, b.w, x.w));
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+double2 pbetainc<double2>(const double2& a, const double2& b, const double2& x)
+{
+ using numext::betainc;
+ return make_double2(betainc(a.x, b.x, x.x), betainc(a.y, b.y, x.y));
+}
+
+#endif
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_CUDA_SPECIALFUNCTIONS_H
diff --git a/unsupported/Eigen/src/Splines/Spline.h b/unsupported/Eigen/src/Splines/Spline.h
index ddcddfc9a..627f6e482 100644
--- a/unsupported/Eigen/src/Splines/Spline.h
+++ b/unsupported/Eigen/src/Splines/Spline.h
@@ -94,7 +94,7 @@ namespace Eigen
const KnotVectorType& knots() const { return m_knots; }
/**
- * \brief Returns the knots of the underlying spline.
+ * \brief Returns the ctrls of the underlying spline.
**/
const ControlPointVectorType& ctrls() const { return m_ctrls; }
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index c9a70d7a7..6188b421a 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -109,6 +109,7 @@ ei_add_test(gmres)
ei_add_test(minres)
ei_add_test(levenberg_marquardt)
ei_add_test(kronecker_product)
+ei_add_test(special_functions)
# TODO: The following test names are prefixed with the cxx11 string, since historically
# the tests depended on c++11. This isn't the case anymore so we ought to rename them.
@@ -116,7 +117,6 @@ ei_add_test(kronecker_product)
# when using visual studio. We should make the check more strict to enable the tests for
# newer versions of MSVC.
if (NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
-ei_add_test(cxx11_float16)
ei_add_test(cxx11_tensor_dimension)
ei_add_test(cxx11_tensor_map)
ei_add_test(cxx11_tensor_assign)
@@ -191,10 +191,12 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA)
# Make sure to compile without the -pedantic, -Wundef, -Wnon-virtual-dtor
# and -fno-check-new flags since they trigger thousands of compilation warnings
# in the CUDA runtime
+ # Also remove -ansi that is incompatible with std=c++11.
string(REPLACE "-pedantic" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-Wundef" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-Wnon-virtual-dtor" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+ string(REPLACE "-ansi" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
message(STATUS "Flags used to compile cuda code: " ${CMAKE_CXX_FLAGS})
@@ -210,7 +212,14 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA)
set(EIGEN_CUDA_RELAXED_CONSTEXPR "--relaxed-constexpr")
endif()
- set(CUDA_NVCC_FLAGS "-std=c++11 ${EIGEN_CUDA_RELAXED_CONSTEXPR} -arch compute_${EIGEN_CUDA_COMPUTE_ARCH} -Xcudafe \"--display_error_number\"")
+ if( (NOT EIGEN_TEST_CXX11) OR (CMAKE_VERSION VERSION_LESS 3.3))
+ set(EIGEN_CUDA_CXX11_FLAG "-std=c++11")
+ else()
+ # otherwise the flag has already been added because of the above set(CMAKE_CXX_STANDARD 11)
+ set(EIGEN_CUDA_CXX11_FLAG "")
+ endif()
+
+ set(CUDA_NVCC_FLAGS "${EIGEN_CUDA_CXX11_FLAG} ${EIGEN_CUDA_RELAXED_CONSTEXPR} -arch compute_${EIGEN_CUDA_COMPUTE_ARCH} -Xcudafe \"--display_error_number\" ${CUDA_NVCC_FLAGS}")
cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
diff --git a/unsupported/test/FFTW.cpp b/unsupported/test/FFTW.cpp
index 1dd6dc97d..8b7528fb7 100644
--- a/unsupported/test/FFTW.cpp
+++ b/unsupported/test/FFTW.cpp
@@ -18,11 +18,11 @@ using namespace Eigen;
template < typename T>
-complex<long double> promote(complex<T> x) { return complex<long double>(x.real(),x.imag()); }
+complex<long double> promote(complex<T> x) { return complex<long double>((long double)x.real(),(long double)x.imag()); }
-complex<long double> promote(float x) { return complex<long double>( x); }
-complex<long double> promote(double x) { return complex<long double>( x); }
-complex<long double> promote(long double x) { return complex<long double>( x); }
+complex<long double> promote(float x) { return complex<long double>((long double)x); }
+complex<long double> promote(double x) { return complex<long double>((long double)x); }
+complex<long double> promote(long double x) { return complex<long double>((long double)x); }
template <typename VT1,typename VT2>
@@ -33,7 +33,7 @@ complex<long double> promote(long double x) { return complex<long double>( x);
long double pi = acos((long double)-1 );
for (size_t k0=0;k0<(size_t)fftbuf.size();++k0) {
complex<long double> acc = 0;
- long double phinc = -2.*k0* pi / timebuf.size();
+ long double phinc = (long double)(-2.)*k0* pi / timebuf.size();
for (size_t k1=0;k1<(size_t)timebuf.size();++k1) {
acc += promote( timebuf[k1] ) * exp( complex<long double>(0,k1*phinc) );
}
@@ -54,8 +54,8 @@ complex<long double> promote(long double x) { return complex<long double>( x);
long double difpower=0;
size_t n = (min)( buf1.size(),buf2.size() );
for (size_t k=0;k<n;++k) {
- totalpower += (numext::abs2( buf1[k] ) + numext::abs2(buf2[k]) )/2;
- difpower += numext::abs2(buf1[k] - buf2[k]);
+ totalpower += (long double)((numext::abs2( buf1[k] ) + numext::abs2(buf2[k]) )/2);
+ difpower += (long double)(numext::abs2(buf1[k] - buf2[k]));
}
return sqrt(difpower/totalpower);
}
@@ -93,19 +93,19 @@ void test_scalar_generic(int nfft)
fft.SetFlag(fft.HalfSpectrum );
fft.fwd( freqBuf,tbuf);
VERIFY((size_t)freqBuf.size() == (size_t)( (nfft>>1)+1) );
- VERIFY( fft_rmse(freqBuf,tbuf) < test_precision<T>() );// gross check
+ VERIFY( T(fft_rmse(freqBuf,tbuf)) < test_precision<T>() );// gross check
fft.ClearFlag(fft.HalfSpectrum );
fft.fwd( freqBuf,tbuf);
VERIFY( (size_t)freqBuf.size() == (size_t)nfft);
- VERIFY( fft_rmse(freqBuf,tbuf) < test_precision<T>() );// gross check
+ VERIFY( T(fft_rmse(freqBuf,tbuf)) < test_precision<T>() );// gross check
if (nfft&1)
return; // odd FFTs get the wrong size inverse FFT
ScalarVector tbuf2;
fft.inv( tbuf2 , freqBuf);
- VERIFY( dif_rmse(tbuf,tbuf2) < test_precision<T>() );// gross check
+ VERIFY( T(dif_rmse(tbuf,tbuf2)) < test_precision<T>() );// gross check
// verify that the Unscaled flag takes effect
@@ -121,12 +121,12 @@ void test_scalar_generic(int nfft)
//for (size_t i=0;i<(size_t) tbuf.size();++i)
// cout << "freqBuf=" << freqBuf[i] << " in2=" << tbuf3[i] << " - in=" << tbuf[i] << " => " << (tbuf3[i] - tbuf[i] ) << endl;
- VERIFY( dif_rmse(tbuf,tbuf3) < test_precision<T>() );// gross check
+ VERIFY( T(dif_rmse(tbuf,tbuf3)) < test_precision<T>() );// gross check
// verify that ClearFlag works
fft.ClearFlag(fft.Unscaled);
fft.inv( tbuf2 , freqBuf);
- VERIFY( dif_rmse(tbuf,tbuf2) < test_precision<T>() );// gross check
+ VERIFY( T(dif_rmse(tbuf,tbuf2)) < test_precision<T>() );// gross check
}
template <typename T>
@@ -152,10 +152,10 @@ void test_complex_generic(int nfft)
inbuf[k]= Complex( (T)(rand()/(double)RAND_MAX - .5), (T)(rand()/(double)RAND_MAX - .5) );
fft.fwd( outbuf , inbuf);
- VERIFY( fft_rmse(outbuf,inbuf) < test_precision<T>() );// gross check
+ VERIFY( T(fft_rmse(outbuf,inbuf)) < test_precision<T>() );// gross check
fft.inv( buf3 , outbuf);
- VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>() );// gross check
+ VERIFY( T(dif_rmse(inbuf,buf3)) < test_precision<T>() );// gross check
// verify that the Unscaled flag takes effect
ComplexVector buf4;
@@ -163,12 +163,12 @@ void test_complex_generic(int nfft)
fft.inv( buf4 , outbuf);
for (int k=0;k<nfft;++k)
buf4[k] *= T(1./nfft);
- VERIFY( dif_rmse(inbuf,buf4) < test_precision<T>() );// gross check
+ VERIFY( T(dif_rmse(inbuf,buf4)) < test_precision<T>() );// gross check
// verify that ClearFlag works
fft.ClearFlag(fft.Unscaled);
fft.inv( buf3 , outbuf);
- VERIFY( dif_rmse(inbuf,buf3) < test_precision<T>() );// gross check
+ VERIFY( T(dif_rmse(inbuf,buf3)) < test_precision<T>() );// gross check
}
template <typename T>
diff --git a/unsupported/test/autodiff.cpp b/unsupported/test/autodiff.cpp
index b59fd1c43..2da6dd8f3 100644
--- a/unsupported/test/autodiff.cpp
+++ b/unsupported/test/autodiff.cpp
@@ -205,6 +205,10 @@ void test_autodiff_hessian()
VERIFY_IS_APPROX(y.value().derivatives()(1), s4*std::cos(s1*s3+s2*s4));
VERIFY_IS_APPROX(y.derivatives()(0).derivatives(), -std::sin(s1*s3+s2*s4)*Vector2d(s3*s3,s4*s3));
VERIFY_IS_APPROX(y.derivatives()(1).derivatives(), -std::sin(s1*s3+s2*s4)*Vector2d(s3*s4,s4*s4));
+
+ ADD z = x(0)*x(1);
+ VERIFY_IS_APPROX(z.derivatives()(0).derivatives(), Vector2d(0,1));
+ VERIFY_IS_APPROX(z.derivatives()(1).derivatives(), Vector2d(1,0));
}
double bug_1222() {
@@ -234,6 +238,32 @@ double bug_1223() {
return t.value() + t2.value();
}
+// regression test for some compilation issues with specializations of ScalarBinaryOpTraits
+void bug_1260() {
+ Matrix4d A;
+ Vector4d v;
+ A*v;
+}
+
+// check a compilation issue with numext::max
+double bug_1261() {
+ typedef AutoDiffScalar<Matrix2d> AD;
+ typedef Matrix<AD,2,1> VectorAD;
+
+ VectorAD v;
+ const AD maxVal = v.maxCoeff();
+ const AD minVal = v.minCoeff();
+ return maxVal.value() + minVal.value();
+}
+
+double bug_1264() {
+ typedef AutoDiffScalar<Vector2d> AD;
+ const AD s;
+ const Matrix<AD, 3, 1> v1;
+ const Matrix<AD, 3, 1> v2 = (s + 3.0) * v1;
+ return v2(0).value();
+}
+
void test_autodiff()
{
for(int i = 0; i < g_repeat; i++) {
@@ -245,5 +275,7 @@ void test_autodiff()
bug_1222();
bug_1223();
+ bug_1260();
+ bug_1261();
}
diff --git a/unsupported/test/cxx11_float16.cpp b/unsupported/test/cxx11_float16.cpp
deleted file mode 100644
index e39a7f83c..000000000
--- a/unsupported/test/cxx11_float16.cpp
+++ /dev/null
@@ -1,203 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#define EIGEN_TEST_NO_LONGDOUBLE
-#define EIGEN_TEST_NO_COMPLEX
-#define EIGEN_TEST_FUNC cxx11_float16
-
-#include "main.h"
-#include <Eigen/src/Core/arch/CUDA/Half.h>
-
-using Eigen::half;
-
-void test_conversion()
-{
- // Conversion from float.
- VERIFY_IS_EQUAL(half(1.0f).x, 0x3c00);
- VERIFY_IS_EQUAL(half(0.5f).x, 0x3800);
- VERIFY_IS_EQUAL(half(0.33333f).x, 0x3555);
- VERIFY_IS_EQUAL(half(0.0f).x, 0x0000);
- VERIFY_IS_EQUAL(half(-0.0f).x, 0x8000);
- VERIFY_IS_EQUAL(half(65504.0f).x, 0x7bff);
- VERIFY_IS_EQUAL(half(65536.0f).x, 0x7c00); // Becomes infinity.
-
- // Denormals.
- VERIFY_IS_EQUAL(half(-5.96046e-08f).x, 0x8001);
- VERIFY_IS_EQUAL(half(5.96046e-08f).x, 0x0001);
- VERIFY_IS_EQUAL(half(1.19209e-07f).x, 0x0002);
-
- // Verify round-to-nearest-even behavior.
- float val1 = float(half(__half(0x3c00)));
- float val2 = float(half(__half(0x3c01)));
- float val3 = float(half(__half(0x3c02)));
- VERIFY_IS_EQUAL(half(0.5f * (val1 + val2)).x, 0x3c00);
- VERIFY_IS_EQUAL(half(0.5f * (val2 + val3)).x, 0x3c02);
-
- // Conversion from int.
- VERIFY_IS_EQUAL(half(-1).x, 0xbc00);
- VERIFY_IS_EQUAL(half(0).x, 0x0000);
- VERIFY_IS_EQUAL(half(1).x, 0x3c00);
- VERIFY_IS_EQUAL(half(2).x, 0x4000);
- VERIFY_IS_EQUAL(half(3).x, 0x4200);
-
- // Conversion from bool.
- VERIFY_IS_EQUAL(half(false).x, 0x0000);
- VERIFY_IS_EQUAL(half(true).x, 0x3c00);
-
- // Conversion to float.
- VERIFY_IS_EQUAL(float(half(__half(0x0000))), 0.0f);
- VERIFY_IS_EQUAL(float(half(__half(0x3c00))), 1.0f);
-
- // Denormals.
- VERIFY_IS_APPROX(float(half(__half(0x8001))), -5.96046e-08f);
- VERIFY_IS_APPROX(float(half(__half(0x0001))), 5.96046e-08f);
- VERIFY_IS_APPROX(float(half(__half(0x0002))), 1.19209e-07f);
-
- // NaNs and infinities.
- VERIFY(!(numext::isinf)(float(half(65504.0f)))); // Largest finite number.
- VERIFY(!(numext::isnan)(float(half(0.0f))));
- VERIFY((numext::isinf)(float(half(__half(0xfc00)))));
- VERIFY((numext::isnan)(float(half(__half(0xfc01)))));
- VERIFY((numext::isinf)(float(half(__half(0x7c00)))));
- VERIFY((numext::isnan)(float(half(__half(0x7c01)))));
-
-#if !EIGEN_COMP_MSVC
- // Visual Studio errors out on divisions by 0
- VERIFY((numext::isnan)(float(half(0.0 / 0.0))));
- VERIFY((numext::isinf)(float(half(1.0 / 0.0))));
- VERIFY((numext::isinf)(float(half(-1.0 / 0.0))));
-#endif
-
- // Exactly same checks as above, just directly on the half representation.
- VERIFY(!(numext::isinf)(half(__half(0x7bff))));
- VERIFY(!(numext::isnan)(half(__half(0x0000))));
- VERIFY((numext::isinf)(half(__half(0xfc00))));
- VERIFY((numext::isnan)(half(__half(0xfc01))));
- VERIFY((numext::isinf)(half(__half(0x7c00))));
- VERIFY((numext::isnan)(half(__half(0x7c01))));
-
-#if !EIGEN_COMP_MSVC
- // Visual Studio errors out on divisions by 0
- VERIFY((numext::isnan)(half(0.0 / 0.0)));
- VERIFY((numext::isinf)(half(1.0 / 0.0)));
- VERIFY((numext::isinf)(half(-1.0 / 0.0)));
-#endif
-}
-
-void test_numtraits()
-{
- std::cout << "expsilin = " << NumTraits<half>::epsilon() << std::endl;
- std::cout << "highest = " << NumTraits<half>::highest() << std::endl;
- std::cout << "lowest = " << NumTraits<half>::lowest() << std::endl;
- std::cout << "inifinty = " << NumTraits<half>::infinity() << std::endl;
- std::cout << "nan = " << NumTraits<half>::quiet_NaN() << std::endl;
-
-}
-
-void test_arithmetic()
-{
- VERIFY_IS_EQUAL(float(half(2) + half(2)), 4);
- VERIFY_IS_EQUAL(float(half(2) + half(-2)), 0);
- VERIFY_IS_APPROX(float(half(0.33333f) + half(0.66667f)), 1.0f);
- VERIFY_IS_EQUAL(float(half(2.0f) * half(-5.5f)), -11.0f);
- VERIFY_IS_APPROX(float(half(1.0f) / half(3.0f)), 0.33333f);
- VERIFY_IS_EQUAL(float(-half(4096.0f)), -4096.0f);
- VERIFY_IS_EQUAL(float(-half(-4096.0f)), 4096.0f);
-}
-
-void test_comparison()
-{
- VERIFY(half(1.0f) > half(0.5f));
- VERIFY(half(0.5f) < half(1.0f));
- VERIFY(!(half(1.0f) < half(0.5f)));
- VERIFY(!(half(0.5f) > half(1.0f)));
-
- VERIFY(!(half(4.0f) > half(4.0f)));
- VERIFY(!(half(4.0f) < half(4.0f)));
-
- VERIFY(!(half(0.0f) < half(-0.0f)));
- VERIFY(!(half(-0.0f) < half(0.0f)));
- VERIFY(!(half(0.0f) > half(-0.0f)));
- VERIFY(!(half(-0.0f) > half(0.0f)));
-
- VERIFY(half(0.2f) > half(-1.0f));
- VERIFY(half(-1.0f) < half(0.2f));
- VERIFY(half(-16.0f) < half(-15.0f));
-
- VERIFY(half(1.0f) == half(1.0f));
- VERIFY(half(1.0f) != half(2.0f));
-
- // Comparisons with NaNs and infinities.
-#if !EIGEN_COMP_MSVC
- // Visual Studio errors out on divisions by 0
- VERIFY(!(half(0.0 / 0.0) == half(0.0 / 0.0)));
- VERIFY(half(0.0 / 0.0) != half(0.0 / 0.0));
-
- VERIFY(!(half(1.0) == half(0.0 / 0.0)));
- VERIFY(!(half(1.0) < half(0.0 / 0.0)));
- VERIFY(!(half(1.0) > half(0.0 / 0.0)));
- VERIFY(half(1.0) != half(0.0 / 0.0));
-
- VERIFY(half(1.0) < half(1.0 / 0.0));
- VERIFY(half(1.0) > half(-1.0 / 0.0));
-#endif
-}
-
-void test_basic_functions()
-{
- VERIFY_IS_EQUAL(float(numext::abs(half(3.5f))), 3.5f);
- VERIFY_IS_EQUAL(float(numext::abs(half(-3.5f))), 3.5f);
-
- VERIFY_IS_EQUAL(float(numext::floor(half(3.5f))), 3.0f);
- VERIFY_IS_EQUAL(float(numext::floor(half(-3.5f))), -4.0f);
-
- VERIFY_IS_EQUAL(float(numext::ceil(half(3.5f))), 4.0f);
- VERIFY_IS_EQUAL(float(numext::ceil(half(-3.5f))), -3.0f);
-
- VERIFY_IS_APPROX(float(numext::sqrt(half(0.0f))), 0.0f);
- VERIFY_IS_APPROX(float(numext::sqrt(half(4.0f))), 2.0f);
-
- VERIFY_IS_APPROX(float(numext::pow(half(0.0f), half(1.0f))), 0.0f);
- VERIFY_IS_APPROX(float(numext::pow(half(2.0f), half(2.0f))), 4.0f);
-
- VERIFY_IS_EQUAL(float(numext::exp(half(0.0f))), 1.0f);
- VERIFY_IS_APPROX(float(numext::exp(half(EIGEN_PI))), float(20.0 + EIGEN_PI));
-
- VERIFY_IS_EQUAL(float(numext::log(half(1.0f))), 0.0f);
- VERIFY_IS_APPROX(float(numext::log(half(10.0f))), 2.30273f);
-}
-
-void test_trigonometric_functions()
-{
- VERIFY_IS_APPROX(numext::cos(half(0.0f)), half(cosf(0.0f)));
- VERIFY_IS_APPROX(numext::cos(half(EIGEN_PI)), half(cosf(EIGEN_PI)));
- //VERIFY_IS_APPROX(numext::cos(half(EIGEN_PI/2)), half(cosf(EIGEN_PI/2)));
- //VERIFY_IS_APPROX(numext::cos(half(3*EIGEN_PI/2)), half(cosf(3*EIGEN_PI/2)));
- VERIFY_IS_APPROX(numext::cos(half(3.5f)), half(cosf(3.5f)));
-
- VERIFY_IS_APPROX(numext::sin(half(0.0f)), half(sinf(0.0f)));
- // VERIFY_IS_APPROX(numext::sin(half(EIGEN_PI)), half(sinf(EIGEN_PI)));
- VERIFY_IS_APPROX(numext::sin(half(EIGEN_PI/2)), half(sinf(EIGEN_PI/2)));
- VERIFY_IS_APPROX(numext::sin(half(3*EIGEN_PI/2)), half(sinf(3*EIGEN_PI/2)));
- VERIFY_IS_APPROX(numext::sin(half(3.5f)), half(sinf(3.5f)));
-
- VERIFY_IS_APPROX(numext::tan(half(0.0f)), half(tanf(0.0f)));
- // VERIFY_IS_APPROX(numext::tan(half(EIGEN_PI)), half(tanf(EIGEN_PI)));
- // VERIFY_IS_APPROX(numext::tan(half(EIGEN_PI/2)), half(tanf(EIGEN_PI/2)));
- //VERIFY_IS_APPROX(numext::tan(half(3*EIGEN_PI/2)), half(tanf(3*EIGEN_PI/2)));
- VERIFY_IS_APPROX(numext::tan(half(3.5f)), half(tanf(3.5f)));
-}
-
-void test_cxx11_float16()
-{
- CALL_SUBTEST(test_conversion());
- CALL_SUBTEST(test_numtraits());
- CALL_SUBTEST(test_arithmetic());
- CALL_SUBTEST(test_comparison());
- CALL_SUBTEST(test_basic_functions());
- CALL_SUBTEST(test_trigonometric_functions());
-}
diff --git a/unsupported/test/cxx11_tensor_dimension.cpp b/unsupported/test/cxx11_tensor_dimension.cpp
index 0bccc3396..16f168ed4 100644
--- a/unsupported/test/cxx11_tensor_dimension.cpp
+++ b/unsupported/test/cxx11_tensor_dimension.cpp
@@ -21,7 +21,7 @@ static void test_dynamic_size()
VERIFY_IS_EQUAL((int)Eigen::internal::array_get<0>(dimensions), 2);
VERIFY_IS_EQUAL((int)Eigen::internal::array_get<1>(dimensions), 3);
VERIFY_IS_EQUAL((int)Eigen::internal::array_get<2>(dimensions), 7);
- VERIFY_IS_EQUAL(dimensions.TotalSize(), 2*3*7);
+ VERIFY_IS_EQUAL((int)dimensions.TotalSize(), 2*3*7);
VERIFY_IS_EQUAL((int)dimensions[0], 2);
VERIFY_IS_EQUAL((int)dimensions[1], 3);
VERIFY_IS_EQUAL((int)dimensions[2], 7);
@@ -34,12 +34,12 @@ static void test_fixed_size()
VERIFY_IS_EQUAL((int)Eigen::internal::array_get<0>(dimensions), 2);
VERIFY_IS_EQUAL((int)Eigen::internal::array_get<1>(dimensions), 3);
VERIFY_IS_EQUAL((int)Eigen::internal::array_get<2>(dimensions), 7);
- VERIFY_IS_EQUAL(dimensions.TotalSize(), 2*3*7);
+ VERIFY_IS_EQUAL((int)dimensions.TotalSize(), 2*3*7);
}
static void test_match()
{
- Eigen::DSizes<int, 3> dyn(2,3,7);
+ Eigen::DSizes<unsigned int, 3> dyn((unsigned int)2,(unsigned int)3,(unsigned int)7);
Eigen::Sizes<2,3,7> stat;
VERIFY_IS_EQUAL(Eigen::dimensions_match(dyn, stat), true);
@@ -51,13 +51,13 @@ static void test_match()
static void test_rank_zero()
{
Eigen::Sizes<> scalar;
- VERIFY_IS_EQUAL(scalar.TotalSize(), 1);
- VERIFY_IS_EQUAL(scalar.rank(), 0);
- VERIFY_IS_EQUAL(internal::array_prod(scalar), 1);
+ VERIFY_IS_EQUAL((int)scalar.TotalSize(), 1);
+ VERIFY_IS_EQUAL((int)scalar.rank(), 0);
+ VERIFY_IS_EQUAL((int)internal::array_prod(scalar), 1);
Eigen::DSizes<ptrdiff_t, 0> dscalar;
- VERIFY_IS_EQUAL(dscalar.TotalSize(), 1);
- VERIFY_IS_EQUAL(dscalar.rank(), 0u);
+ VERIFY_IS_EQUAL((int)dscalar.TotalSize(), 1);
+ VERIFY_IS_EQUAL((int)dscalar.rank(), 0);
}
void test_cxx11_tensor_dimension()
diff --git a/unsupported/test/cxx11_tensor_morphing.cpp b/unsupported/test/cxx11_tensor_morphing.cpp
index c575d3fdc..f7de43110 100644
--- a/unsupported/test/cxx11_tensor_morphing.cpp
+++ b/unsupported/test/cxx11_tensor_morphing.cpp
@@ -13,6 +13,7 @@
using Eigen::Tensor;
+template<typename>
static void test_simple_reshape()
{
Tensor<float, 5> tensor1(2,3,1,7,1);
@@ -40,7 +41,7 @@ static void test_simple_reshape()
}
}
-
+template<typename>
static void test_reshape_in_expr() {
MatrixXf m1(2,3*5*7*11);
MatrixXf m2(3*5*7*11,13);
@@ -65,7 +66,7 @@ static void test_reshape_in_expr() {
}
}
-
+template<typename>
static void test_reshape_as_lvalue()
{
Tensor<float, 3> tensor(2,3,7);
@@ -114,6 +115,7 @@ static void test_simple_slice()
}
}
+template<typename=void>
static void test_const_slice()
{
const float b[1] = {42};
@@ -459,25 +461,25 @@ static void test_composition()
void test_cxx11_tensor_morphing()
{
- CALL_SUBTEST(test_simple_reshape());
- CALL_SUBTEST(test_reshape_in_expr());
- CALL_SUBTEST(test_reshape_as_lvalue());
-
- CALL_SUBTEST(test_simple_slice<ColMajor>());
- CALL_SUBTEST(test_simple_slice<RowMajor>());
- CALL_SUBTEST(test_const_slice());
- CALL_SUBTEST(test_slice_in_expr<ColMajor>());
- CALL_SUBTEST(test_slice_in_expr<RowMajor>());
- CALL_SUBTEST(test_slice_as_lvalue<ColMajor>());
- CALL_SUBTEST(test_slice_as_lvalue<RowMajor>());
- CALL_SUBTEST(test_slice_raw_data<ColMajor>());
- CALL_SUBTEST(test_slice_raw_data<RowMajor>());
-
- CALL_SUBTEST(test_strided_slice_write<ColMajor>());
- CALL_SUBTEST(test_strided_slice<ColMajor>());
- CALL_SUBTEST(test_strided_slice_write<RowMajor>());
- CALL_SUBTEST(test_strided_slice<RowMajor>());
-
- CALL_SUBTEST(test_composition<ColMajor>());
- CALL_SUBTEST(test_composition<RowMajor>());
+ CALL_SUBTEST_1(test_simple_reshape<void>());
+ CALL_SUBTEST_1(test_reshape_in_expr<void>());
+ CALL_SUBTEST_1(test_reshape_as_lvalue<void>());
+
+ CALL_SUBTEST_1(test_simple_slice<ColMajor>());
+ CALL_SUBTEST_1(test_simple_slice<RowMajor>());
+ CALL_SUBTEST_1(test_const_slice());
+ CALL_SUBTEST_2(test_slice_in_expr<ColMajor>());
+ CALL_SUBTEST_3(test_slice_in_expr<RowMajor>());
+ CALL_SUBTEST_4(test_slice_as_lvalue<ColMajor>());
+ CALL_SUBTEST_4(test_slice_as_lvalue<RowMajor>());
+ CALL_SUBTEST_5(test_slice_raw_data<ColMajor>());
+ CALL_SUBTEST_5(test_slice_raw_data<RowMajor>());
+
+ CALL_SUBTEST_6(test_strided_slice_write<ColMajor>());
+ CALL_SUBTEST_6(test_strided_slice<ColMajor>());
+ CALL_SUBTEST_6(test_strided_slice_write<RowMajor>());
+ CALL_SUBTEST_6(test_strided_slice<RowMajor>());
+
+ CALL_SUBTEST_7(test_composition<ColMajor>());
+ CALL_SUBTEST_7(test_composition<RowMajor>());
}
diff --git a/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/unsupported/test/cxx11_tensor_of_float16_cuda.cu
index 34e9f54a0..2f55f9361 100644
--- a/unsupported/test/cxx11_tensor_of_float16_cuda.cu
+++ b/unsupported/test/cxx11_tensor_of_float16_cuda.cu
@@ -19,8 +19,47 @@
using Eigen::Tensor;
+template<typename>
+void test_cuda_numext() {
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+ int num_elem = 101;
+
+ float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ bool* d_res_half = (bool*)gpu_device.allocate(num_elem * sizeof(bool));
+ bool* d_res_float = (bool*)gpu_device.allocate(num_elem * sizeof(bool));
+
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
+ d_float, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<bool, 1>, Eigen::Aligned> gpu_res_half(
+ d_res_half, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<bool, 1>, Eigen::Aligned> gpu_res_float(
+ d_res_float, num_elem);
+
+ gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f);
+ gpu_res_float.device(gpu_device) = gpu_float.unaryExpr(Eigen::internal::scalar_isnan_op<float>());
+ gpu_res_half.device(gpu_device) = gpu_float.cast<Eigen::half>().unaryExpr(Eigen::internal::scalar_isnan_op<Eigen::half>());
+
+ Tensor<bool, 1> half_prec(num_elem);
+ Tensor<bool, 1> full_prec(num_elem);
+ gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(bool));
+ gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(bool));
+ gpu_device.synchronize();
+
+ for (int i = 0; i < num_elem; ++i) {
+ std::cout << "Checking numext " << i << std::endl;
+ VERIFY_IS_EQUAL(full_prec(i), half_prec(i));
+ }
+
+ gpu_device.deallocate(d_float);
+ gpu_device.deallocate(d_res_half);
+ gpu_device.deallocate(d_res_float);
+}
+
+
#ifdef EIGEN_HAS_CUDA_FP16
+template<typename>
void test_cuda_conversion() {
Eigen::CudaStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
@@ -55,7 +94,7 @@ void test_cuda_conversion() {
gpu_device.deallocate(d_conv);
}
-
+template<typename>
void test_cuda_unary() {
Eigen::CudaStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
@@ -92,7 +131,7 @@ void test_cuda_unary() {
gpu_device.deallocate(d_res_float);
}
-
+template<typename>
void test_cuda_elementwise() {
Eigen::CudaStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
@@ -134,6 +173,7 @@ void test_cuda_elementwise() {
gpu_device.deallocate(d_res_float);
}
+template<typename>
void test_cuda_trancendental() {
Eigen::CudaStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
@@ -186,7 +226,10 @@ void test_cuda_trancendental() {
}
for (int i = 0; i < num_elem; ++i) {
std::cout << "Checking elemwise log " << i << " input = " << input2(i) << " full = " << full_prec2(i) << " half = " << half_prec2(i) << std::endl;
- VERIFY_IS_APPROX(full_prec2(i), half_prec2(i));
+ if(std::abs(input2(i)-1.f)<0.05f) // log lacks accurary nearby 1
+ VERIFY_IS_APPROX(full_prec2(i)+Eigen::half(0.1f), half_prec2(i)+Eigen::half(0.1f));
+ else
+ VERIFY_IS_APPROX(full_prec2(i), half_prec2(i));
}
gpu_device.deallocate(d_float1);
gpu_device.deallocate(d_float2);
@@ -196,7 +239,7 @@ void test_cuda_trancendental() {
gpu_device.deallocate(d_res2_float);
}
-
+template<typename>
void test_cuda_contractions() {
Eigen::CudaStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
@@ -247,7 +290,7 @@ void test_cuda_contractions() {
gpu_device.deallocate(d_res_float);
}
-
+template<typename>
void test_cuda_reductions(int size1, int size2, int redux) {
std::cout << "Reducing " << size1 << " by " << size2
@@ -296,17 +339,19 @@ void test_cuda_reductions(int size1, int size2, int redux) {
gpu_device.deallocate(d_res_float);
}
+template<typename>
void test_cuda_reductions() {
- test_cuda_reductions(13, 13, 0);
- test_cuda_reductions(13, 13, 1);
+ test_cuda_reductions<void>(13, 13, 0);
+ test_cuda_reductions<void>(13, 13, 1);
- test_cuda_reductions(35, 36, 0);
- test_cuda_reductions(35, 36, 1);
+ test_cuda_reductions<void>(35, 36, 0);
+ test_cuda_reductions<void>(35, 36, 1);
- test_cuda_reductions(36, 35, 0);
- test_cuda_reductions(36, 35, 1);
+ test_cuda_reductions<void>(36, 35, 0);
+ test_cuda_reductions<void>(36, 35, 1);
}
+template<typename>
void test_cuda_full_reductions() {
Eigen::CudaStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
@@ -355,7 +400,7 @@ void test_cuda_full_reductions() {
gpu_device.deallocate(d_res_float);
}
-
+template<typename>
void test_cuda_forced_evals() {
Eigen::CudaStreamDevice stream;
@@ -408,15 +453,17 @@ void test_cuda_forced_evals() {
void test_cxx11_tensor_of_float16_cuda()
{
+ CALL_SUBTEST_1(test_cuda_numext<void>());
+
#ifdef EIGEN_HAS_CUDA_FP16
- CALL_SUBTEST_1(test_cuda_conversion());
- CALL_SUBTEST_1(test_cuda_unary());
- CALL_SUBTEST_1(test_cuda_elementwise());
- CALL_SUBTEST_1(test_cuda_trancendental());
- CALL_SUBTEST_2(test_cuda_contractions());
- CALL_SUBTEST_3(test_cuda_reductions());
- CALL_SUBTEST_4(test_cuda_full_reductions());
- CALL_SUBTEST_5(test_cuda_forced_evals());
+ CALL_SUBTEST_1(test_cuda_conversion<void>());
+ CALL_SUBTEST_1(test_cuda_unary<void>());
+ CALL_SUBTEST_1(test_cuda_elementwise<void>());
+ CALL_SUBTEST_1(test_cuda_trancendental<void>());
+ CALL_SUBTEST_2(test_cuda_contractions<void>());
+ CALL_SUBTEST_3(test_cuda_reductions<void>());
+ CALL_SUBTEST_4(test_cuda_full_reductions<void>());
+ CALL_SUBTEST_5(test_cuda_forced_evals<void>());
#else
std::cout << "Half floats are not supported by this version of cuda: skipping the test" << std::endl;
#endif
diff --git a/unsupported/test/cxx11_tensor_reduction.cpp b/unsupported/test/cxx11_tensor_reduction.cpp
index ca483257b..1490ec3da 100644
--- a/unsupported/test/cxx11_tensor_reduction.cpp
+++ b/unsupported/test/cxx11_tensor_reduction.cpp
@@ -239,6 +239,33 @@ static void test_simple_reductions() {
}
}
+
+template <int DataLayout>
+static void test_reductions_in_expr() {
+ Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
+ tensor.setRandom();
+ array<ptrdiff_t, 2> reduction_axis2;
+ reduction_axis2[0] = 1;
+ reduction_axis2[1] = 3;
+
+ Tensor<float, 2, DataLayout> result(2, 5);
+ result = result.constant(1.0f) - tensor.sum(reduction_axis2);
+ VERIFY_IS_EQUAL(result.dimension(0), 2);
+ VERIFY_IS_EQUAL(result.dimension(1), 5);
+ for (int i = 0; i < 2; ++i) {
+ for (int j = 0; j < 5; ++j) {
+ float sum = 0.0f;
+ for (int k = 0; k < 3; ++k) {
+ for (int l = 0; l < 7; ++l) {
+ sum += tensor(i, k, j, l);
+ }
+ }
+ VERIFY_IS_APPROX(result(i, j), 1.0f - sum);
+ }
+ }
+}
+
+
template <int DataLayout>
static void test_full_reductions() {
Tensor<float, 2, DataLayout> tensor(2, 3);
@@ -462,6 +489,8 @@ void test_cxx11_tensor_reduction() {
CALL_SUBTEST(test_trivial_reductions<RowMajor>());
CALL_SUBTEST(test_simple_reductions<ColMajor>());
CALL_SUBTEST(test_simple_reductions<RowMajor>());
+ CALL_SUBTEST(test_reductions_in_expr<ColMajor>());
+ CALL_SUBTEST(test_reductions_in_expr<RowMajor>());
CALL_SUBTEST(test_full_reductions<ColMajor>());
CALL_SUBTEST(test_full_reductions<RowMajor>());
CALL_SUBTEST(test_user_defined_reductions<ColMajor>());
diff --git a/unsupported/test/mpreal_support.cpp b/unsupported/test/mpreal_support.cpp
index 1aa9e786a..ffa5691eb 100644
--- a/unsupported/test/mpreal_support.cpp
+++ b/unsupported/test/mpreal_support.cpp
@@ -17,6 +17,7 @@ void test_mpreal_support()
std::cerr << "dummy_precision = " << NumTraits<mpreal>::dummy_precision() << "\n";
std::cerr << "highest = " << NumTraits<mpreal>::highest() << "\n";
std::cerr << "lowest = " << NumTraits<mpreal>::lowest() << "\n";
+ std::cerr << "digits10 = " << NumTraits<mpreal>::digits10() << "\n";
for(int i = 0; i < g_repeat; i++) {
int s = Eigen::internal::random<int>(1,100);
diff --git a/unsupported/test/special_functions.cpp b/unsupported/test/special_functions.cpp
new file mode 100644
index 000000000..057fb3e92
--- /dev/null
+++ b/unsupported/test/special_functions.cpp
@@ -0,0 +1,345 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+#include "../Eigen/SpecialFunctions"
+
+template<typename X, typename Y>
+void verify_component_wise(const X& x, const Y& y)
+{
+ for(Index i=0; i<x.size(); ++i)
+ {
+ if((numext::isfinite)(y(i)))
+ VERIFY_IS_APPROX( x(i), y(i) );
+ else if((numext::isnan)(y(i)))
+ VERIFY((numext::isnan)(x(i)));
+ else
+ VERIFY_IS_EQUAL( x(i), y(i) );
+ }
+}
+
+template<typename ArrayType> void array_special_functions()
+{
+ using std::abs;
+ using std::sqrt;
+ typedef typename ArrayType::Scalar Scalar;
+ typedef typename NumTraits<Scalar>::Real RealScalar;
+
+ Scalar plusinf = std::numeric_limits<Scalar>::infinity();
+ Scalar nan = std::numeric_limits<Scalar>::quiet_NaN();
+
+ Index rows = internal::random<Index>(1,30);
+ Index cols = 1;
+
+ // API
+ {
+ ArrayType m1 = ArrayType::Random(rows,cols);
+#if EIGEN_HAS_C99_MATH
+ VERIFY_IS_APPROX(m1.lgamma(), lgamma(m1));
+ VERIFY_IS_APPROX(m1.digamma(), digamma(m1));
+ VERIFY_IS_APPROX(m1.erf(), erf(m1));
+ VERIFY_IS_APPROX(m1.erfc(), erfc(m1));
+#endif // EIGEN_HAS_C99_MATH
+ }
+
+
+#if EIGEN_HAS_C99_MATH
+ // check special functions (comparing against numpy implementation)
+ if (!NumTraits<Scalar>::IsComplex)
+ {
+
+ {
+ ArrayType m1 = ArrayType::Random(rows,cols);
+ ArrayType m2 = ArrayType::Random(rows,cols);
+
+ // Test various propreties of igamma & igammac. These are normalized
+ // gamma integrals where
+ // igammac(a, x) = Gamma(a, x) / Gamma(a)
+ // igamma(a, x) = gamma(a, x) / Gamma(a)
+ // where Gamma and gamma are considered the standard unnormalized
+ // upper and lower incomplete gamma functions, respectively.
+ ArrayType a = m1.abs() + 2;
+ ArrayType x = m2.abs() + 2;
+ ArrayType zero = ArrayType::Zero(rows, cols);
+ ArrayType one = ArrayType::Constant(rows, cols, Scalar(1.0));
+ ArrayType a_m1 = a - one;
+ ArrayType Gamma_a_x = Eigen::igammac(a, x) * a.lgamma().exp();
+ ArrayType Gamma_a_m1_x = Eigen::igammac(a_m1, x) * a_m1.lgamma().exp();
+ ArrayType gamma_a_x = Eigen::igamma(a, x) * a.lgamma().exp();
+ ArrayType gamma_a_m1_x = Eigen::igamma(a_m1, x) * a_m1.lgamma().exp();
+
+ // Gamma(a, 0) == Gamma(a)
+ VERIFY_IS_APPROX(Eigen::igammac(a, zero), one);
+
+ // Gamma(a, x) + gamma(a, x) == Gamma(a)
+ VERIFY_IS_APPROX(Gamma_a_x + gamma_a_x, a.lgamma().exp());
+
+ // Gamma(a, x) == (a - 1) * Gamma(a-1, x) + x^(a-1) * exp(-x)
+ VERIFY_IS_APPROX(Gamma_a_x, (a - 1) * Gamma_a_m1_x + x.pow(a-1) * (-x).exp());
+
+ // gamma(a, x) == (a - 1) * gamma(a-1, x) - x^(a-1) * exp(-x)
+ VERIFY_IS_APPROX(gamma_a_x, (a - 1) * gamma_a_m1_x - x.pow(a-1) * (-x).exp());
+ }
+
+ {
+ // Check exact values of igamma and igammac against a third party calculation.
+ Scalar a_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)};
+ Scalar x_s[] = {Scalar(0), Scalar(1), Scalar(1.5), Scalar(4), Scalar(0.0001), Scalar(1000.5)};
+
+ // location i*6+j corresponds to a_s[i], x_s[j].
+ Scalar igamma_s[][6] = {{0.0, nan, nan, nan, nan, nan},
+ {0.0, 0.6321205588285578, 0.7768698398515702,
+ 0.9816843611112658, 9.999500016666262e-05, 1.0},
+ {0.0, 0.4275932955291202, 0.608374823728911,
+ 0.9539882943107686, 7.522076445089201e-07, 1.0},
+ {0.0, 0.01898815687615381, 0.06564245437845008,
+ 0.5665298796332909, 4.166333347221828e-18, 1.0},
+ {0.0, 0.9999780593618628, 0.9999899967080838,
+ 0.9999996219837988, 0.9991370418689945, 1.0},
+ {0.0, 0.0, 0.0, 0.0, 0.0, 0.5042041932513908}};
+ Scalar igammac_s[][6] = {{nan, nan, nan, nan, nan, nan},
+ {1.0, 0.36787944117144233, 0.22313016014842982,
+ 0.018315638888734182, 0.9999000049998333, 0.0},
+ {1.0, 0.5724067044708798, 0.3916251762710878,
+ 0.04601170568923136, 0.9999992477923555, 0.0},
+ {1.0, 0.9810118431238462, 0.9343575456215499,
+ 0.4334701203667089, 1.0, 0.0},
+ {1.0, 2.1940638138146658e-05, 1.0003291916285e-05,
+ 3.7801620118431334e-07, 0.0008629581310054535,
+ 0.0},
+ {1.0, 1.0, 1.0, 1.0, 1.0, 0.49579580674813944}};
+ for (int i = 0; i < 6; ++i) {
+ for (int j = 0; j < 6; ++j) {
+ if ((std::isnan)(igamma_s[i][j])) {
+ VERIFY((std::isnan)(numext::igamma(a_s[i], x_s[j])));
+ } else {
+ VERIFY_IS_APPROX(numext::igamma(a_s[i], x_s[j]), igamma_s[i][j]);
+ }
+
+ if ((std::isnan)(igammac_s[i][j])) {
+ VERIFY((std::isnan)(numext::igammac(a_s[i], x_s[j])));
+ } else {
+ VERIFY_IS_APPROX(numext::igammac(a_s[i], x_s[j]), igammac_s[i][j]);
+ }
+ }
+ }
+ }
+ }
+#endif // EIGEN_HAS_C99_MATH
+
+ // Check the zeta function against scipy.special.zeta
+ {
+ ArrayType x(7), q(7), res(7), ref(7);
+ x << 1.5, 4, 10.5, 10000.5, 3, 1, 0.9;
+ q << 2, 1.5, 3, 1.0001, -2.5, 1.2345, 1.2345;
+ ref << 1.61237534869, 0.234848505667, 1.03086757337e-5, 0.367879440865, 0.054102025820864097, plusinf, nan;
+ CALL_SUBTEST( verify_component_wise(ref, ref); );
+ CALL_SUBTEST( res = x.zeta(q); verify_component_wise(res, ref); );
+ CALL_SUBTEST( res = zeta(x,q); verify_component_wise(res, ref); );
+ }
+
+ // digamma
+ {
+ ArrayType x(7), res(7), ref(7);
+ x << 1, 1.5, 4, -10.5, 10000.5, 0, -1;
+ ref << -0.5772156649015329, 0.03648997397857645, 1.2561176684318, 2.398239129535781, 9.210340372392849, plusinf, plusinf;
+ CALL_SUBTEST( verify_component_wise(ref, ref); );
+
+ CALL_SUBTEST( res = x.digamma(); verify_component_wise(res, ref); );
+ CALL_SUBTEST( res = digamma(x); verify_component_wise(res, ref); );
+ }
+
+
+#if EIGEN_HAS_C99_MATH
+ {
+ ArrayType n(11), x(11), res(11), ref(11);
+ n << 1, 1, 1, 1.5, 17, 31, 28, 8, 42, 147, 170;
+ x << 2, 3, 25.5, 1.5, 4.7, 11.8, 17.7, 30.2, 15.8, 54.1, 64;
+ ref << 0.644934066848, 0.394934066848, 0.0399946696496, nan, 293.334565435, 0.445487887616, -2.47810300902e-07, -8.29668781082e-09, -0.434562276666, 0.567742190178, -0.0108615497927;
+ CALL_SUBTEST( verify_component_wise(ref, ref); );
+
+ if(sizeof(RealScalar)>=8) { // double
+ // Reason for commented line: http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1232
+ // CALL_SUBTEST( res = x.polygamma(n); verify_component_wise(res, ref); );
+ CALL_SUBTEST( res = polygamma(n,x); verify_component_wise(res, ref); );
+ }
+ else {
+ // CALL_SUBTEST( res = x.polygamma(n); verify_component_wise(res.head(8), ref.head(8)); );
+ CALL_SUBTEST( res = polygamma(n,x); verify_component_wise(res.head(8), ref.head(8)); );
+ }
+ }
+#endif
+
+#if EIGEN_HAS_C99_MATH
+ {
+ // Inputs and ground truth generated with scipy via:
+ // a = np.logspace(-3, 3, 5) - 1e-3
+ // b = np.logspace(-3, 3, 5) - 1e-3
+ // x = np.linspace(-0.1, 1.1, 5)
+ // (full_a, full_b, full_x) = np.vectorize(lambda a, b, x: (a, b, x))(*np.ix_(a, b, x))
+ // full_a = full_a.flatten().tolist() # same for full_b, full_x
+ // v = scipy.special.betainc(full_a, full_b, full_x).flatten().tolist()
+ //
+ // Note in Eigen, we call betainc with arguments in the order (x, a, b).
+ ArrayType a(125);
+ ArrayType b(125);
+ ArrayType x(125);
+ ArrayType v(125);
+ ArrayType res(125);
+
+ a << 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999,
+ 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999,
+ 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999, 0.999,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999,
+ 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999,
+ 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999, 999.999,
+ 999.999, 999.999, 999.999;
+
+ b << 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379, 0.999,
+ 0.999, 0.999, 0.999, 0.999, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379, 999.999,
+ 999.999, 999.999, 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.999, 0.999, 0.999, 0.999,
+ 0.999, 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 999.999, 999.999, 999.999,
+ 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 999.999, 999.999, 999.999,
+ 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 999.999, 999.999, 999.999,
+ 999.999, 999.999, 0.0, 0.0, 0.0, 0.0, 0.0, 0.03062277660168379,
+ 0.03062277660168379, 0.03062277660168379, 0.03062277660168379,
+ 0.03062277660168379, 0.999, 0.999, 0.999, 0.999, 0.999,
+ 31.62177660168379, 31.62177660168379, 31.62177660168379,
+ 31.62177660168379, 31.62177660168379, 999.999, 999.999, 999.999,
+ 999.999, 999.999;
+
+ x << -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5,
+ 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2,
+ 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1,
+ 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1,
+ -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8,
+ 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5,
+ 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2,
+ 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1,
+ 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5, 0.8, 1.1, -0.1, 0.2, 0.5,
+ 0.8, 1.1;
+
+ v << nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
+ nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
+ nan, nan, nan, 0.47972119876364683, 0.5, 0.5202788012363533, nan, nan,
+ 0.9518683957740043, 0.9789663010413743, 0.9931729188073435, nan, nan,
+ 0.999995949033062, 0.9999999999993698, 0.9999999999999999, nan, nan,
+ 0.9999999999999999, 0.9999999999999999, 0.9999999999999999, nan, nan,
+ nan, nan, nan, nan, nan, 0.006827081192655869, 0.0210336989586256,
+ 0.04813160422599567, nan, nan, 0.20014344256217678, 0.5000000000000001,
+ 0.7998565574378232, nan, nan, 0.9991401428435834, 0.999999999698403,
+ 0.9999999999999999, nan, nan, 0.9999999999999999, 0.9999999999999999,
+ 0.9999999999999999, nan, nan, nan, nan, nan, nan, nan,
+ 1.0646600232370887e-25, 6.301722877826246e-13, 4.050966937974938e-06,
+ nan, nan, 7.864342668429763e-23, 3.015969667594166e-10,
+ 0.0008598571564165444, nan, nan, 6.031987710123844e-08,
+ 0.5000000000000007, 0.9999999396801229, nan, nan, 0.9999999999999999,
+ 0.9999999999999999, 0.9999999999999999, nan, nan, nan, nan, nan, nan,
+ nan, 0.0, 7.029920380986636e-306, 2.2450728208591345e-101, nan, nan,
+ 0.0, 9.275871147869727e-302, 1.2232913026152827e-97, nan, nan, 0.0,
+ 3.0891393081932924e-252, 2.9303043666183996e-60, nan, nan,
+ 2.248913486879199e-196, 0.5000000000004947, 0.9999999999999999, nan;
+
+ CALL_SUBTEST(res = betainc(a, b, x);
+ verify_component_wise(res, v););
+ }
+
+ // Test various properties of betainc
+ {
+ ArrayType m1 = ArrayType::Random(32);
+ ArrayType m2 = ArrayType::Random(32);
+ ArrayType m3 = ArrayType::Random(32);
+ ArrayType one = ArrayType::Constant(32, Scalar(1.0));
+ const Scalar eps = std::numeric_limits<Scalar>::epsilon();
+ ArrayType a = (m1 * 4.0).exp();
+ ArrayType b = (m2 * 4.0).exp();
+ ArrayType x = m3.abs();
+
+ // betainc(a, 1, x) == x**a
+ CALL_SUBTEST(
+ ArrayType test = betainc(a, one, x);
+ ArrayType expected = x.pow(a);
+ verify_component_wise(test, expected););
+
+ // betainc(1, b, x) == 1 - (1 - x)**b
+ CALL_SUBTEST(
+ ArrayType test = betainc(one, b, x);
+ ArrayType expected = one - (one - x).pow(b);
+ verify_component_wise(test, expected););
+
+ // betainc(a, b, x) == 1 - betainc(b, a, 1-x)
+ CALL_SUBTEST(
+ ArrayType test = betainc(a, b, x) + betainc(b, a, one - x);
+ ArrayType expected = one;
+ verify_component_wise(test, expected););
+
+ // betainc(a+1, b, x) = betainc(a, b, x) - x**a * (1 - x)**b / (a * beta(a, b))
+ CALL_SUBTEST(
+ ArrayType num = x.pow(a) * (one - x).pow(b);
+ ArrayType denom = a * (a.lgamma() + b.lgamma() - (a + b).lgamma()).exp();
+ // Add eps to rhs and lhs so that component-wise test doesn't result in
+ // nans when both outputs are zeros.
+ ArrayType expected = betainc(a, b, x) - num / denom + eps;
+ ArrayType test = betainc(a + one, b, x) + eps;
+ if (sizeof(Scalar) >= 8) { // double
+ verify_component_wise(test, expected);
+ } else {
+ // Reason for limited test: http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1232
+ verify_component_wise(test.head(8), expected.head(8));
+ });
+
+ // betainc(a, b+1, x) = betainc(a, b, x) + x**a * (1 - x)**b / (b * beta(a, b))
+ CALL_SUBTEST(
+ // Add eps to rhs and lhs so that component-wise test doesn't result in
+ // nans when both outputs are zeros.
+ ArrayType num = x.pow(a) * (one - x).pow(b);
+ ArrayType denom = b * (a.lgamma() + b.lgamma() - (a + b).lgamma()).exp();
+ ArrayType expected = betainc(a, b, x) + num / denom + eps;
+ ArrayType test = betainc(a, b + one, x) + eps;
+ verify_component_wise(test, expected););
+ }
+#endif
+}
+
+void test_special_functions()
+{
+ CALL_SUBTEST_1(array_special_functions<ArrayXf>());
+ CALL_SUBTEST_2(array_special_functions<ArrayXd>());
+}