diff options
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h | 207 |
1 files changed, 137 insertions, 70 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index 34ba4e392..b7c13f67f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -25,7 +25,34 @@ struct scalar_mod_op { }; template <typename Scalar> struct functor_traits<scalar_mod_op<Scalar> > -{ enum { Cost = 2 * NumTraits<Scalar>::MulCost, PacketAccess = false }; }; +{ enum { Cost = NumTraits<Scalar>::template Div<false>::Cost, PacketAccess = false }; }; + + +/** \internal + * \brief Template functor to compute the modulo between 2 arrays. + */ +template <typename Scalar> +struct scalar_mod2_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_mod2_op); + EIGEN_DEVICE_FUNC inline Scalar operator() (const Scalar& a, const Scalar& b) const { return a % b; } +}; +template <typename Scalar> +struct functor_traits<scalar_mod2_op<Scalar> > +{ enum { Cost = NumTraits<Scalar>::template Div<false>::Cost, PacketAccess = false }; }; + +template <typename Scalar> +struct scalar_fmod_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_fmod_op); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar + operator()(const Scalar& a, const Scalar& b) const { + return numext::fmod(a, b); + } +}; +template <typename Scalar> +struct functor_traits<scalar_fmod_op<Scalar> > { + enum { Cost = 13, // Reciprocal throughput of FPREM on Haswell. + PacketAccess = false }; +}; /** \internal @@ -72,11 +99,12 @@ template <typename T> struct SumReducer } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - return static_cast<T>(0); + internal::scalar_cast_op<int, T> conv; + return conv(0); } template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { - return pset1<Packet>(0); + return pset1<Packet>(initialize()); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { return accum; @@ -93,7 +121,7 @@ template <typename T> struct SumReducer template <typename T> struct MeanReducer { - static const bool PacketAccess = true; + static const bool PacketAccess = !NumTraits<T>::IsInteger; static const bool IsStateful = true; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -110,11 +138,12 @@ template <typename T> struct MeanReducer } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - return static_cast<T>(0); + internal::scalar_cast_op<int, T> conv; + return conv(0); } template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { - return pset1<Packet>(0); + return pset1<Packet>(initialize()); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { return accum / scalarCount_; @@ -147,11 +176,11 @@ template <typename T> struct MaxReducer } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - return -(std::numeric_limits<T>::max)(); + return Eigen::NumTraits<T>::lowest(); } template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { - return pset1<Packet>(-(std::numeric_limits<T>::max)()); + return pset1<Packet>(initialize()); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { return accum; @@ -180,11 +209,11 @@ template <typename T> struct MinReducer } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - return (std::numeric_limits<T>::max)(); + return Eigen::NumTraits<T>::highest(); } template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { - return pset1<Packet>((std::numeric_limits<T>::max)()); + return pset1<Packet>(initialize()); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { return accum; @@ -214,11 +243,12 @@ template <typename T> struct ProdReducer } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - return static_cast<T>(1); + internal::scalar_cast_op<int, T> conv; + return conv(1); } template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { - return pset1<Packet>(1); + return pset1<Packet>(initialize()); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { return accum; @@ -237,6 +267,8 @@ template <typename T> struct ProdReducer struct AndReducer { static const bool PacketAccess = false; + static const bool IsStateful = false; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const { *accum = *accum && t; } @@ -250,6 +282,8 @@ struct AndReducer struct OrReducer { static const bool PacketAccess = false; + static const bool IsStateful = false; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(bool t, bool* accum) const { *accum = *accum || t; } @@ -335,50 +369,54 @@ template <typename T> class UniformRandomGenerator { } template<typename Index> - T operator()(Index, Index = 0) const { + T operator()(Index) const { return random<T>(); } - template<typename Index> - typename internal::packet_traits<T>::type packetOp(Index, Index = 0) const { - const int packetSize = internal::packet_traits<T>::size; + template<typename Index, typename PacketType> + PacketType packetOp(Index) const { + const int packetSize = internal::unpacket_traits<PacketType>::size; EIGEN_ALIGN_MAX T values[packetSize]; for (int i = 0; i < packetSize; ++i) { values[i] = random<T>(); } - return internal::pload<typename internal::packet_traits<T>::type>(values); + return internal::pload<PacketType>(values); } private: bool m_deterministic; }; -#if __cplusplus > 199711 +#if __cplusplus > 199711 || EIGEN_COMP_MSVC >= 1900 template <> class UniformRandomGenerator<float> { public: static const bool PacketAccess = true; - UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { + UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic), m_generator(new std::mt19937()) { if (!deterministic) { - m_generator.seed(get_random_seed()); + m_generator->seed(get_random_seed()); } } UniformRandomGenerator(const UniformRandomGenerator<float>& other) { - m_generator.seed(other(0, 0) * UINT_MAX); + m_generator = new std::mt19937(); + m_generator->seed(other(0) * UINT_MAX); m_deterministic = other.m_deterministic; } + ~UniformRandomGenerator() { + delete m_generator; + } template<typename Index> - float operator()(Index, Index = 0) const { - return m_distribution(m_generator); + float operator()(Index) const { + return m_distribution(*m_generator); } - template<typename Index> - typename internal::packet_traits<float>::type packetOp(Index i, Index j = 0) const { - const int packetSize = internal::packet_traits<float>::size; + template<typename Index, typename PacketType> + PacketType packetOp(Index i) const { + const int packetSize = internal::unpacket_traits<PacketType>::size; EIGEN_ALIGN_MAX float values[packetSize]; for (int k = 0; k < packetSize; ++k) { - values[k] = this->operator()(i, j); + values[k] = this->operator()(i); } - return internal::pload<typename internal::packet_traits<float>::type>(values); + return internal::pload<PacketType>(values); } private: @@ -386,7 +424,7 @@ template <> class UniformRandomGenerator<float> { // Make sure m_deterministic comes first to match the layout of the cpu // version of the code. bool m_deterministic; - mutable std::mt19937 m_generator; + std::mt19937* m_generator; mutable std::uniform_real_distribution<float> m_distribution; }; @@ -394,28 +432,32 @@ template <> class UniformRandomGenerator<double> { public: static const bool PacketAccess = true; - UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { + UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic), m_generator(new std::mt19937()) { if (!deterministic) { - m_generator.seed(get_random_seed()); + m_generator->seed(get_random_seed()); } } UniformRandomGenerator(const UniformRandomGenerator<double>& other) { - m_generator.seed(other(0, 0) * UINT_MAX); + m_generator = new std::mt19937(); + m_generator->seed(other(0) * UINT_MAX); m_deterministic = other.m_deterministic; } + ~UniformRandomGenerator() { + delete m_generator; + } template<typename Index> - double operator()(Index, Index = 0) const { - return m_distribution(m_generator); + double operator()(Index) const { + return m_distribution(*m_generator); } - template<typename Index> - typename internal::packet_traits<double>::type packetOp(Index i, Index j = 0) const { - const int packetSize = internal::packet_traits<double>::size; + template<typename Index, typename PacketType> + PacketType packetOp(Index i) const { + const int packetSize = internal::unpacket_traits<PacketType>::size; EIGEN_ALIGN_MAX double values[packetSize]; for (int k = 0; k < packetSize; ++k) { - values[k] = this->operator()(i, j); + values[k] = this->operator()(i); } - return internal::pload<typename internal::packet_traits<double>::type>(values); + return internal::pload<PacketType>(values); } private: @@ -423,7 +465,7 @@ template <> class UniformRandomGenerator<double> { // Make sure m_deterministic comes first to match the layout of the cpu // version of the code. bool m_deterministic; - mutable std::mt19937 m_generator; + std::mt19937* m_generator; mutable std::uniform_real_distribution<double> m_distribution; }; #endif @@ -451,11 +493,12 @@ template <> class UniformRandomGenerator<float> { } template<typename Index> - __device__ float operator()(Index, Index = 0) const { + __device__ float operator()(Index) const { return curand_uniform(&m_state); } - template<typename Index> - __device__ float4 packetOp(Index, Index = 0) const { + template<typename Index, typename PacketType> + __device__ float4 packetOp(Index) const { + EIGEN_STATIC_ASSERT((is_same<PacketType, float4>::value), YOU_MADE_A_PROGRAMMING_MISTAKE); return curand_uniform4(&m_state); } @@ -480,11 +523,12 @@ template <> class UniformRandomGenerator<double> { curand_init(seed, tid, 0, &m_state); } template<typename Index> - __device__ double operator()(Index, Index = 0) const { + __device__ double operator()(Index) const { return curand_uniform_double(&m_state); } - template<typename Index> - __device__ double2 packetOp(Index, Index = 0) const { + template<typename Index, typename PacketType> + __device__ double2 packetOp(Index) const { + EIGEN_STATIC_ASSERT((is_same<PacketType, double2>::value), YOU_MADE_A_PROGRAMMING_MISTAKE); return curand_uniform2_double(&m_state); } @@ -509,7 +553,7 @@ template <> class UniformRandomGenerator<std::complex<float> > { curand_init(seed, tid, 0, &m_state); } template<typename Index> - __device__ std::complex<float> operator()(Index, Index = 0) const { + __device__ std::complex<float> operator()(Index) const { float4 vals = curand_uniform4(&m_state); return std::complex<float>(vals.x, vals.y); } @@ -535,7 +579,7 @@ template <> class UniformRandomGenerator<std::complex<double> > { curand_init(seed, tid, 0, &m_state); } template<typename Index> - __device__ std::complex<double> operator()(Index, Index = 0) const { + __device__ std::complex<double> operator()(Index) const { double2 vals = curand_uniform2_double(&m_state); return std::complex<double>(vals.x, vals.y); } @@ -547,41 +591,54 @@ template <> class UniformRandomGenerator<std::complex<double> > { #endif +template <typename Scalar> +struct functor_traits<UniformRandomGenerator<Scalar> > { + enum { + PacketAccess = UniformRandomGenerator<Scalar>::PacketAccess + }; +}; + -#if (!defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__)) && __cplusplus > 199711 + +#if (!defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__)) && (__cplusplus > 199711 || EIGEN_COMP_MSVC >= 1900) // We're not compiling a cuda kernel template <typename T> class NormalRandomGenerator { public: static const bool PacketAccess = true; - NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic), m_distribution(0, 1) { + NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic), m_distribution(0, 1), m_generator(new std::mt19937()) { if (!deterministic) { - m_generator.seed(get_random_seed()); + m_generator->seed(get_random_seed()); } } NormalRandomGenerator(const NormalRandomGenerator& other) - : m_deterministic(other.m_deterministic), m_distribution(other.m_distribution) { - m_generator.seed(other(0, 0) * UINT_MAX); + : m_deterministic(other.m_deterministic), m_distribution(other.m_distribution), m_generator(new std::mt19937()) { + m_generator->seed(other(0) * UINT_MAX); } - - template<typename Index> - T operator()(Index, Index = 0) const { - return m_distribution(m_generator); + ~NormalRandomGenerator() { + delete m_generator; } template<typename Index> - typename internal::packet_traits<T>::type packetOp(Index, Index = 0) const { - const int packetSize = internal::packet_traits<T>::size; + T operator()(Index) const { + return m_distribution(*m_generator); + } + template<typename Index, typename PacketType> + PacketType packetOp(Index) const { + const int packetSize = internal::unpacket_traits<PacketType>::size; EIGEN_ALIGN_MAX T values[packetSize]; for (int i = 0; i < packetSize; ++i) { - values[i] = m_distribution(m_generator); + values[i] = m_distribution(*m_generator); } - return internal::pload<typename internal::packet_traits<T>::type>(values); + return internal::pload<PacketType>(values); } private: + // No assignment + NormalRandomGenerator& operator = (const NormalRandomGenerator&); + bool m_deterministic; mutable std::normal_distribution<T> m_distribution; - mutable std::mt19937 m_generator; + std::mt19937* m_generator; }; #elif defined (EIGEN_USE_GPU) && defined(__CUDACC__) && defined(__CUDA_ARCH__) @@ -605,11 +662,12 @@ template <> class NormalRandomGenerator<float> { curand_init(seed, tid, 0, &m_state); } template<typename Index> - __device__ float operator()(Index, Index = 0) const { + __device__ float operator()(Index) const { return curand_normal(&m_state); } - template<typename Index> - __device__ float4 packetOp(Index, Index = 0) const { + template<typename Index, typename PacketType> + __device__ float4 packetOp(Index) const { + EIGEN_STATIC_ASSERT((is_same<PacketType, float4>::value), YOU_MADE_A_PROGRAMMING_MISTAKE); return curand_normal4(&m_state); } @@ -634,11 +692,12 @@ template <> class NormalRandomGenerator<double> { curand_init(seed, tid, 0, &m_state); } template<typename Index> - __device__ double operator()(Index, Index = 0) const { + __device__ double operator()(Index) const { return curand_normal_double(&m_state); } - template<typename Index> - __device__ double2 packetOp(Index, Index = 0) const { + template<typename Index, typename PacketType> + __device__ double2 packetOp(Index) const { + EIGEN_STATIC_ASSERT((is_same<PacketType, double2>::value), YOU_MADE_A_PROGRAMMING_MISTAKE); return curand_normal2_double(&m_state); } @@ -663,7 +722,7 @@ template <> class NormalRandomGenerator<std::complex<float> > { curand_init(seed, tid, 0, &m_state); } template<typename Index> - __device__ std::complex<float> operator()(Index, Index = 0) const { + __device__ std::complex<float> operator()(Index) const { float4 vals = curand_normal4(&m_state); return std::complex<float>(vals.x, vals.y); } @@ -689,7 +748,7 @@ template <> class NormalRandomGenerator<std::complex<double> > { curand_init(seed, tid, 0, &m_state); } template<typename Index> - __device__ std::complex<double> operator()(Index, Index = 0) const { + __device__ std::complex<double> operator()(Index) const { double2 vals = curand_normal2_double(&m_state); return std::complex<double>(vals.x, vals.y); } @@ -703,6 +762,7 @@ template <> class NormalRandomGenerator<std::complex<double> > { template <typename T> class NormalRandomGenerator { public: + static const bool PacketAccess = false; NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {} private: @@ -711,6 +771,13 @@ template <typename T> class NormalRandomGenerator { #endif +template <typename Scalar> +struct functor_traits<NormalRandomGenerator<Scalar> > { + enum { + PacketAccess = NormalRandomGenerator<Scalar>::PacketAccess + }; +}; + template <typename T, typename Index, size_t NumDims> class GaussianGenerator { |