aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
diff options
context:
space:
mode:
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h207
1 files changed, 137 insertions, 70 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
index 34ba4e392..b7c13f67f 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
@@ -25,7 +25,34 @@ struct scalar_mod_op {
};
template <typename Scalar>
struct functor_traits<scalar_mod_op<Scalar> >
-{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, PacketAccess = false }; };
+{ enum { Cost = NumTraits<Scalar>::template Div<false>::Cost, PacketAccess = false }; };
+
+
+/** \internal
+ * \brief Template functor to compute the modulo between 2 arrays.
+ */
+template <typename Scalar>
+struct scalar_mod2_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_mod2_op);
+ EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a, const Scalar& b) const { return a % b; }
+};
+template <typename Scalar>
+struct functor_traits<scalar_mod2_op<Scalar> >
+{ enum { Cost = NumTraits<Scalar>::template Div<false>::Cost, PacketAccess = false }; };
+
+template <typename Scalar>
+struct scalar_fmod_op {
+ EIGEN_EMPTY_STRUCT_CTOR(scalar_fmod_op);
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar
+ operator()(const Scalar& a, const Scalar& b) const {
+ return numext::fmod(a, b);
+ }
+};
+template <typename Scalar>
+struct functor_traits<scalar_fmod_op<Scalar> > {
+ enum { Cost = 13, // Reciprocal throughput of FPREM on Haswell.
+ PacketAccess = false };
+};
/** \internal
@@ -72,11 +99,12 @@ template <typename T> struct SumReducer
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
- return static_cast<T>(0);
+ internal::scalar_cast_op<int, T> conv;
+ return conv(0);
}
template <typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
- return pset1<Packet>(0);
+ return pset1<Packet>(initialize());
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
return accum;
@@ -93,7 +121,7 @@ template <typename T> struct SumReducer
template <typename T> struct MeanReducer
{
- static const bool PacketAccess = true;
+ static const bool PacketAccess = !NumTraits<T>::IsInteger;
static const bool IsStateful = true;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -110,11 +138,12 @@ template <typename T> struct MeanReducer
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
- return static_cast<T>(0);
+ internal::scalar_cast_op<int, T> conv;
+ return conv(0);
}
template <typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
- return pset1<Packet>(0);
+ return pset1<Packet>(initialize());
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
return accum / scalarCount_;
@@ -147,11 +176,11 @@ template <typename T> struct MaxReducer
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
- return -(std::numeric_limits<T>::max)();
+ return Eigen::NumTraits<T>::lowest();
}
template <typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
- return pset1<Packet>(-(std::numeric_limits<T>::max)());
+ return pset1<Packet>(initialize());
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
return accum;
@@ -180,11 +209,11 @@ template <typename T> struct MinReducer
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
- return (std::numeric_limits<T>::max)();
+ return Eigen::NumTraits<T>::highest();
}
template <typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
- return pset1<Packet>((std::numeric_limits<T>::max)());
+ return pset1<Packet>(initialize());
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
return accum;
@@ -214,11 +243,12 @@ template <typename T> struct ProdReducer
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
- return static_cast<T>(1);
+ internal::scalar_cast_op<int, T> conv;
+ return conv(1);
}
template <typename Packet>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
- return pset1<Packet>(1);
+ return pset1<Packet>(initialize());
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
return accum;
@@ -237,6 +267,8 @@ template <typename T> struct ProdReducer
struct AndReducer
{
static const bool PacketAccess = false;
+ static const bool IsStateful = false;
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const {
*accum = *accum && t;
}
@@ -250,6 +282,8 @@ struct AndReducer
struct OrReducer {
static const bool PacketAccess = false;
+ static const bool IsStateful = false;
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const {
*accum = *accum || t;
}
@@ -335,50 +369,54 @@ template <typename T> class UniformRandomGenerator {
}
template<typename Index>
- T operator()(Index, Index = 0) const {
+ T operator()(Index) const {
return random<T>();
}
- template<typename Index>
- typename internal::packet_traits<T>::type packetOp(Index, Index = 0) const {
- const int packetSize = internal::packet_traits<T>::size;
+ template<typename Index, typename PacketType>
+ PacketType packetOp(Index) const {
+ const int packetSize = internal::unpacket_traits<PacketType>::size;
EIGEN_ALIGN_MAX T values[packetSize];
for (int i = 0; i < packetSize; ++i) {
values[i] = random<T>();
}
- return internal::pload<typename internal::packet_traits<T>::type>(values);
+ return internal::pload<PacketType>(values);
}
private:
bool m_deterministic;
};
-#if __cplusplus > 199711
+#if __cplusplus > 199711 || EIGEN_COMP_MSVC >= 1900
template <> class UniformRandomGenerator<float> {
public:
static const bool PacketAccess = true;
- UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
+ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic), m_generator(new std::mt19937()) {
if (!deterministic) {
- m_generator.seed(get_random_seed());
+ m_generator->seed(get_random_seed());
}
}
UniformRandomGenerator(const UniformRandomGenerator<float>& other) {
- m_generator.seed(other(0, 0) * UINT_MAX);
+ m_generator = new std::mt19937();
+ m_generator->seed(other(0) * UINT_MAX);
m_deterministic = other.m_deterministic;
}
+ ~UniformRandomGenerator() {
+ delete m_generator;
+ }
template<typename Index>
- float operator()(Index, Index = 0) const {
- return m_distribution(m_generator);
+ float operator()(Index) const {
+ return m_distribution(*m_generator);
}
- template<typename Index>
- typename internal::packet_traits<float>::type packetOp(Index i, Index j = 0) const {
- const int packetSize = internal::packet_traits<float>::size;
+ template<typename Index, typename PacketType>
+ PacketType packetOp(Index i) const {
+ const int packetSize = internal::unpacket_traits<PacketType>::size;
EIGEN_ALIGN_MAX float values[packetSize];
for (int k = 0; k < packetSize; ++k) {
- values[k] = this->operator()(i, j);
+ values[k] = this->operator()(i);
}
- return internal::pload<typename internal::packet_traits<float>::type>(values);
+ return internal::pload<PacketType>(values);
}
private:
@@ -386,7 +424,7 @@ template <> class UniformRandomGenerator<float> {
// Make sure m_deterministic comes first to match the layout of the cpu
// version of the code.
bool m_deterministic;
- mutable std::mt19937 m_generator;
+ std::mt19937* m_generator;
mutable std::uniform_real_distribution<float> m_distribution;
};
@@ -394,28 +432,32 @@ template <> class UniformRandomGenerator<double> {
public:
static const bool PacketAccess = true;
- UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {
+ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic), m_generator(new std::mt19937()) {
if (!deterministic) {
- m_generator.seed(get_random_seed());
+ m_generator->seed(get_random_seed());
}
}
UniformRandomGenerator(const UniformRandomGenerator<double>& other) {
- m_generator.seed(other(0, 0) * UINT_MAX);
+ m_generator = new std::mt19937();
+ m_generator->seed(other(0) * UINT_MAX);
m_deterministic = other.m_deterministic;
}
+ ~UniformRandomGenerator() {
+ delete m_generator;
+ }
template<typename Index>
- double operator()(Index, Index = 0) const {
- return m_distribution(m_generator);
+ double operator()(Index) const {
+ return m_distribution(*m_generator);
}
- template<typename Index>
- typename internal::packet_traits<double>::type packetOp(Index i, Index j = 0) const {
- const int packetSize = internal::packet_traits<double>::size;
+ template<typename Index, typename PacketType>
+ PacketType packetOp(Index i) const {
+ const int packetSize = internal::unpacket_traits<PacketType>::size;
EIGEN_ALIGN_MAX double values[packetSize];
for (int k = 0; k < packetSize; ++k) {
- values[k] = this->operator()(i, j);
+ values[k] = this->operator()(i);
}
- return internal::pload<typename internal::packet_traits<double>::type>(values);
+ return internal::pload<PacketType>(values);
}
private:
@@ -423,7 +465,7 @@ template <> class UniformRandomGenerator<double> {
// Make sure m_deterministic comes first to match the layout of the cpu
// version of the code.
bool m_deterministic;
- mutable std::mt19937 m_generator;
+ std::mt19937* m_generator;
mutable std::uniform_real_distribution<double> m_distribution;
};
#endif
@@ -451,11 +493,12 @@ template <> class UniformRandomGenerator<float> {
}
template<typename Index>
- __device__ float operator()(Index, Index = 0) const {
+ __device__ float operator()(Index) const {
return curand_uniform(&m_state);
}
- template<typename Index>
- __device__ float4 packetOp(Index, Index = 0) const {
+ template<typename Index, typename PacketType>
+ __device__ float4 packetOp(Index) const {
+ EIGEN_STATIC_ASSERT((is_same<PacketType, float4>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
return curand_uniform4(&m_state);
}
@@ -480,11 +523,12 @@ template <> class UniformRandomGenerator<double> {
curand_init(seed, tid, 0, &m_state);
}
template<typename Index>
- __device__ double operator()(Index, Index = 0) const {
+ __device__ double operator()(Index) const {
return curand_uniform_double(&m_state);
}
- template<typename Index>
- __device__ double2 packetOp(Index, Index = 0) const {
+ template<typename Index, typename PacketType>
+ __device__ double2 packetOp(Index) const {
+ EIGEN_STATIC_ASSERT((is_same<PacketType, double2>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
return curand_uniform2_double(&m_state);
}
@@ -509,7 +553,7 @@ template <> class UniformRandomGenerator<std::complex<float> > {
curand_init(seed, tid, 0, &m_state);
}
template<typename Index>
- __device__ std::complex<float> operator()(Index, Index = 0) const {
+ __device__ std::complex<float> operator()(Index) const {
float4 vals = curand_uniform4(&m_state);
return std::complex<float>(vals.x, vals.y);
}
@@ -535,7 +579,7 @@ template <> class UniformRandomGenerator<std::complex<double> > {
curand_init(seed, tid, 0, &m_state);
}
template<typename Index>
- __device__ std::complex<double> operator()(Index, Index = 0) const {
+ __device__ std::complex<double> operator()(Index) const {
double2 vals = curand_uniform2_double(&m_state);
return std::complex<double>(vals.x, vals.y);
}
@@ -547,41 +591,54 @@ template <> class UniformRandomGenerator<std::complex<double> > {
#endif
+template <typename Scalar>
+struct functor_traits<UniformRandomGenerator<Scalar> > {
+ enum {
+ PacketAccess = UniformRandomGenerator<Scalar>::PacketAccess
+ };
+};
+
-#if (!defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__)) && __cplusplus > 199711
+
+#if (!defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__)) && (__cplusplus > 199711 || EIGEN_COMP_MSVC >= 1900)
// We're not compiling a cuda kernel
template <typename T> class NormalRandomGenerator {
public:
static const bool PacketAccess = true;
- NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic), m_distribution(0, 1) {
+ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic), m_distribution(0, 1), m_generator(new std::mt19937()) {
if (!deterministic) {
- m_generator.seed(get_random_seed());
+ m_generator->seed(get_random_seed());
}
}
NormalRandomGenerator(const NormalRandomGenerator& other)
- : m_deterministic(other.m_deterministic), m_distribution(other.m_distribution) {
- m_generator.seed(other(0, 0) * UINT_MAX);
+ : m_deterministic(other.m_deterministic), m_distribution(other.m_distribution), m_generator(new std::mt19937()) {
+ m_generator->seed(other(0) * UINT_MAX);
}
-
- template<typename Index>
- T operator()(Index, Index = 0) const {
- return m_distribution(m_generator);
+ ~NormalRandomGenerator() {
+ delete m_generator;
}
template<typename Index>
- typename internal::packet_traits<T>::type packetOp(Index, Index = 0) const {
- const int packetSize = internal::packet_traits<T>::size;
+ T operator()(Index) const {
+ return m_distribution(*m_generator);
+ }
+ template<typename Index, typename PacketType>
+ PacketType packetOp(Index) const {
+ const int packetSize = internal::unpacket_traits<PacketType>::size;
EIGEN_ALIGN_MAX T values[packetSize];
for (int i = 0; i < packetSize; ++i) {
- values[i] = m_distribution(m_generator);
+ values[i] = m_distribution(*m_generator);
}
- return internal::pload<typename internal::packet_traits<T>::type>(values);
+ return internal::pload<PacketType>(values);
}
private:
+ // No assignment
+ NormalRandomGenerator& operator = (const NormalRandomGenerator&);
+
bool m_deterministic;
mutable std::normal_distribution<T> m_distribution;
- mutable std::mt19937 m_generator;
+ std::mt19937* m_generator;
};
#elif defined (EIGEN_USE_GPU) && defined(__CUDACC__) && defined(__CUDA_ARCH__)
@@ -605,11 +662,12 @@ template <> class NormalRandomGenerator<float> {
curand_init(seed, tid, 0, &m_state);
}
template<typename Index>
- __device__ float operator()(Index, Index = 0) const {
+ __device__ float operator()(Index) const {
return curand_normal(&m_state);
}
- template<typename Index>
- __device__ float4 packetOp(Index, Index = 0) const {
+ template<typename Index, typename PacketType>
+ __device__ float4 packetOp(Index) const {
+ EIGEN_STATIC_ASSERT((is_same<PacketType, float4>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
return curand_normal4(&m_state);
}
@@ -634,11 +692,12 @@ template <> class NormalRandomGenerator<double> {
curand_init(seed, tid, 0, &m_state);
}
template<typename Index>
- __device__ double operator()(Index, Index = 0) const {
+ __device__ double operator()(Index) const {
return curand_normal_double(&m_state);
}
- template<typename Index>
- __device__ double2 packetOp(Index, Index = 0) const {
+ template<typename Index, typename PacketType>
+ __device__ double2 packetOp(Index) const {
+ EIGEN_STATIC_ASSERT((is_same<PacketType, double2>::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
return curand_normal2_double(&m_state);
}
@@ -663,7 +722,7 @@ template <> class NormalRandomGenerator<std::complex<float> > {
curand_init(seed, tid, 0, &m_state);
}
template<typename Index>
- __device__ std::complex<float> operator()(Index, Index = 0) const {
+ __device__ std::complex<float> operator()(Index) const {
float4 vals = curand_normal4(&m_state);
return std::complex<float>(vals.x, vals.y);
}
@@ -689,7 +748,7 @@ template <> class NormalRandomGenerator<std::complex<double> > {
curand_init(seed, tid, 0, &m_state);
}
template<typename Index>
- __device__ std::complex<double> operator()(Index, Index = 0) const {
+ __device__ std::complex<double> operator()(Index) const {
double2 vals = curand_normal2_double(&m_state);
return std::complex<double>(vals.x, vals.y);
}
@@ -703,6 +762,7 @@ template <> class NormalRandomGenerator<std::complex<double> > {
template <typename T> class NormalRandomGenerator {
public:
+ static const bool PacketAccess = false;
NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {}
private:
@@ -711,6 +771,13 @@ template <typename T> class NormalRandomGenerator {
#endif
+template <typename Scalar>
+struct functor_traits<NormalRandomGenerator<Scalar> > {
+ enum {
+ PacketAccess = NormalRandomGenerator<Scalar>::PacketAccess
+ };
+};
+
template <typename T, typename Index, size_t NumDims>
class GaussianGenerator {