diff options
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h | 588 |
1 files changed, 123 insertions, 465 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h index 33cd00391..7164e8d60 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h @@ -25,7 +25,7 @@ struct scalar_mod_op { }; template <typename Scalar> struct functor_traits<scalar_mod_op<Scalar> > -{ enum { Cost = NumTraits<Scalar>::template Div<false>::Cost, PacketAccess = false }; }; +{ enum { Cost = scalar_div_cost<Scalar,false>::value, PacketAccess = false }; }; /** \internal @@ -38,7 +38,7 @@ struct scalar_mod2_op { }; template <typename Scalar> struct functor_traits<scalar_mod2_op<Scalar> > -{ enum { Cost = NumTraits<Scalar>::template Div<false>::Cost, PacketAccess = false }; }; +{ enum { Cost = scalar_div_cost<Scalar,false>::value, PacketAccess = false }; }; template <typename Scalar> struct scalar_fmod_op { @@ -69,7 +69,7 @@ struct scalar_sigmoid_op { template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { - const Packet one = pset1<Packet>(1); + const Packet one = pset1<Packet>(T(1)); return pdiv(one, padd(one, pexp(pnegate(x)))); } }; @@ -84,14 +84,23 @@ struct functor_traits<scalar_sigmoid_op<T> > { }; +template<typename Reducer, typename Device> +struct reducer_traits { + enum { + Cost = 1, + PacketAccess = false + }; +}; + // Standard reduction functors template <typename T> struct SumReducer { - static const bool PacketAccess = true; + static const bool PacketAccess = packet_traits<T>::HasAdd; static const bool IsStateful = false; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { - (*accum) += t; + internal::scalar_sum_op<T> sum_op; + *accum = sum_op(*accum, t); } template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { @@ -119,16 +128,26 @@ template <typename T> struct SumReducer } }; +template <typename T, typename Device> +struct reducer_traits<SumReducer<T>, Device> { + enum { + Cost = NumTraits<T>::AddCost, + PacketAccess = PacketType<T, Device>::HasAdd + }; +}; + + template <typename T> struct MeanReducer { - static const bool PacketAccess = !NumTraits<T>::IsInteger; + static const bool PacketAccess = packet_traits<T>::HasAdd && !NumTraits<T>::IsInteger; static const bool IsStateful = true; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE MeanReducer() : scalarCount_(0), packetCount_(0) { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) { - (*accum) += t; + internal::scalar_sum_op<T> sum_op; + *accum = sum_op(*accum, t); scalarCount_++; } template <typename Packet> @@ -162,9 +181,44 @@ template <typename T> struct MeanReducer DenseIndex packetCount_; }; +template <typename T, typename Device> +struct reducer_traits<MeanReducer<T>, Device> { + enum { + Cost = NumTraits<T>::AddCost, + PacketAccess = PacketType<T, Device>::HasAdd + }; +}; + + +template <typename T, bool IsMax = true, bool IsInteger = true> +struct MinMaxBottomValue { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { + return Eigen::NumTraits<T>::lowest(); + } +}; +template <typename T> +struct MinMaxBottomValue<T, true, false> { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { + return -Eigen::NumTraits<T>::infinity(); + } +}; +template <typename T> +struct MinMaxBottomValue<T, false, true> { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { + return Eigen::NumTraits<T>::highest(); + } +}; +template <typename T> +struct MinMaxBottomValue<T, false, false> { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE T bottom_value() { + return Eigen::NumTraits<T>::infinity(); + } +}; + + template <typename T> struct MaxReducer { - static const bool PacketAccess = true; + static const bool PacketAccess = packet_traits<T>::HasMax; static const bool IsStateful = false; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { @@ -174,9 +228,8 @@ template <typename T> struct MaxReducer EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { (*accum) = pmax<Packet>(*accum, p); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - return Eigen::NumTraits<T>::lowest(); + return MinMaxBottomValue<T, true, Eigen::NumTraits<T>::IsInteger>::bottom_value(); } template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { @@ -195,9 +248,18 @@ template <typename T> struct MaxReducer } }; +template <typename T, typename Device> +struct reducer_traits<MaxReducer<T>, Device> { + enum { + Cost = NumTraits<T>::AddCost, + PacketAccess = PacketType<T, Device>::HasMax + }; +}; + + template <typename T> struct MinReducer { - static const bool PacketAccess = true; + static const bool PacketAccess = packet_traits<T>::HasMin; static const bool IsStateful = false; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { @@ -207,9 +269,8 @@ template <typename T> struct MinReducer EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) const { (*accum) = pmin<Packet>(*accum, p); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - return Eigen::NumTraits<T>::highest(); + return MinMaxBottomValue<T, false, Eigen::NumTraits<T>::IsInteger>::bottom_value(); } template <typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { @@ -228,10 +289,18 @@ template <typename T> struct MinReducer } }; +template <typename T, typename Device> +struct reducer_traits<MinReducer<T>, Device> { + enum { + Cost = NumTraits<T>::AddCost, + PacketAccess = PacketType<T, Device>::HasMin + }; +}; + template <typename T> struct ProdReducer { - static const bool PacketAccess = true; + static const bool PacketAccess = packet_traits<T>::HasMul; static const bool IsStateful = false; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) const { @@ -263,6 +332,14 @@ template <typename T> struct ProdReducer } }; +template <typename T, typename Device> +struct reducer_traits<ProdReducer<T>, Device> { + enum { + Cost = NumTraits<T>::MulCost, + PacketAccess = PacketType<T, Device>::HasMul + }; +}; + struct AndReducer { @@ -280,6 +357,15 @@ struct AndReducer } }; +template <typename Device> +struct reducer_traits<AndReducer, Device> { + enum { + Cost = 1, + PacketAccess = false + }; +}; + + struct OrReducer { static const bool PacketAccess = false; static const bool IsStateful = false; @@ -295,6 +381,15 @@ struct OrReducer { } }; +template <typename Device> +struct reducer_traits<OrReducer, Device> { + enum { + Cost = 1, + PacketAccess = false + }; +}; + + // Argmin/Argmax reducers template <typename T> struct ArgMaxTupleReducer { @@ -312,6 +407,15 @@ template <typename T> struct ArgMaxTupleReducer } }; +template <typename T, typename Device> +struct reducer_traits<ArgMaxTupleReducer<T>, Device> { + enum { + Cost = NumTraits<T>::AddCost, + PacketAccess = false + }; +}; + + template <typename T> struct ArgMinTupleReducer { static const bool PacketAccess = false; @@ -328,457 +432,11 @@ template <typename T> struct ArgMinTupleReducer } }; - -// Random number generation -namespace { -#ifdef __CUDA_ARCH__ -__device__ int get_random_seed() { - return clock(); -} -#else -int get_random_seed() { -#ifdef _WIN32 - SYSTEMTIME st; - GetSystemTime(&st); - return st.wSecond + 1000 * st.wMilliseconds; -#elif defined __APPLE__ - return static_cast<int>(mach_absolute_time()); -#else - timespec ts; - clock_gettime(CLOCK_REALTIME, &ts); - return static_cast<int>(ts.tv_nsec); -#endif -} -#endif -} - -#if !defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__) -// We're not compiling a cuda kernel -template <typename T> class UniformRandomGenerator { - - public: - static const bool PacketAccess = true; - - UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - if (!deterministic) { - srand(get_random_seed()); - } - } - UniformRandomGenerator(const UniformRandomGenerator& other) { - m_deterministic = other.m_deterministic; - } - - template<typename Index> - T operator()(Index) const { - return random<T>(); - } - template<typename Index, typename PacketType> - PacketType packetOp(Index) const { - const int packetSize = internal::unpacket_traits<PacketType>::size; - EIGEN_ALIGN_MAX T values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = random<T>(); - } - return internal::pload<PacketType>(values); - } - - private: - bool m_deterministic; -}; - -#if __cplusplus > 199711 || EIGEN_COMP_MSVC >= 1900 -template <> class UniformRandomGenerator<float> { - public: - static const bool PacketAccess = true; - - UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic), m_generator(new std::mt19937()) { - if (!deterministic) { - m_generator->seed(get_random_seed()); - } - } - UniformRandomGenerator(const UniformRandomGenerator<float>& other) { - m_generator = new std::mt19937(); - m_generator->seed(other(0) * UINT_MAX); - m_deterministic = other.m_deterministic; - } - ~UniformRandomGenerator() { - delete m_generator; - } - - template<typename Index> - float operator()(Index) const { - return m_distribution(*m_generator); - } - template<typename Index, typename PacketType> - PacketType packetOp(Index i) const { - const int packetSize = internal::unpacket_traits<PacketType>::size; - EIGEN_ALIGN_MAX float values[packetSize]; - for (int k = 0; k < packetSize; ++k) { - values[k] = this->operator()(i); - } - return internal::pload<PacketType>(values); - } - - private: - UniformRandomGenerator& operator = (const UniformRandomGenerator&); - // Make sure m_deterministic comes first to match the layout of the cpu - // version of the code. - bool m_deterministic; - std::mt19937* m_generator; - mutable std::uniform_real_distribution<float> m_distribution; -}; - -template <> class UniformRandomGenerator<double> { - public: - static const bool PacketAccess = true; - - UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic), m_generator(new std::mt19937()) { - if (!deterministic) { - m_generator->seed(get_random_seed()); - } - } - UniformRandomGenerator(const UniformRandomGenerator<double>& other) { - m_generator = new std::mt19937(); - m_generator->seed(other(0) * UINT_MAX); - m_deterministic = other.m_deterministic; - } - ~UniformRandomGenerator() { - delete m_generator; - } - - template<typename Index> - double operator()(Index) const { - return m_distribution(*m_generator); - } - template<typename Index, typename PacketType> - PacketType packetOp(Index i) const { - const int packetSize = internal::unpacket_traits<PacketType>::size; - EIGEN_ALIGN_MAX double values[packetSize]; - for (int k = 0; k < packetSize; ++k) { - values[k] = this->operator()(i); - } - return internal::pload<PacketType>(values); - } - - private: - UniformRandomGenerator& operator = (const UniformRandomGenerator&); - // Make sure m_deterministic comes first to match the layout of the cpu - // version of the code. - bool m_deterministic; - std::mt19937* m_generator; - mutable std::uniform_real_distribution<double> m_distribution; -}; -#endif - -#else - -// We're compiling a cuda kernel -template <typename T> class UniformRandomGenerator; - -template <> class UniformRandomGenerator<float> { - public: - static const bool PacketAccess = true; - - __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - - __device__ UniformRandomGenerator(const UniformRandomGenerator& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - - template<typename Index> - __device__ float operator()(Index) const { - return curand_uniform(&m_state); - } - template<typename Index, typename PacketType> - __device__ float4 packetOp(Index) const { - EIGEN_STATIC_ASSERT((is_same<PacketType, float4>::value), YOU_MADE_A_PROGRAMMING_MISTAKE); - return curand_uniform4(&m_state); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class UniformRandomGenerator<double> { - public: - static const bool PacketAccess = true; - - __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ UniformRandomGenerator(const UniformRandomGenerator& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - template<typename Index> - __device__ double operator()(Index) const { - return curand_uniform_double(&m_state); - } - template<typename Index, typename PacketType> - __device__ double2 packetOp(Index) const { - EIGEN_STATIC_ASSERT((is_same<PacketType, double2>::value), YOU_MADE_A_PROGRAMMING_MISTAKE); - return curand_uniform2_double(&m_state); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class UniformRandomGenerator<std::complex<float> > { - public: - static const bool PacketAccess = false; - - __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ UniformRandomGenerator(const UniformRandomGenerator& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - template<typename Index> - __device__ std::complex<float> operator()(Index) const { - float4 vals = curand_uniform4(&m_state); - return std::complex<float>(vals.x, vals.y); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class UniformRandomGenerator<std::complex<double> > { - public: - static const bool PacketAccess = false; - - __device__ UniformRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ UniformRandomGenerator(const UniformRandomGenerator& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - template<typename Index> - __device__ std::complex<double> operator()(Index) const { - double2 vals = curand_uniform2_double(&m_state); - return std::complex<double>(vals.x, vals.y); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -#endif - -template <typename Scalar> -struct functor_traits<UniformRandomGenerator<Scalar> > { - enum { - // Rough estimate. - Cost = 100 * NumTraits<Scalar>::MulCost, - PacketAccess = UniformRandomGenerator<Scalar>::PacketAccess - }; -}; - - - -#if (!defined (EIGEN_USE_GPU) || !defined(__CUDACC__) || !defined(__CUDA_ARCH__)) && (__cplusplus > 199711 || EIGEN_COMP_MSVC >= 1900) -// We're not compiling a cuda kernel -template <typename T> class NormalRandomGenerator { - public: - static const bool PacketAccess = true; - - NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic), m_distribution(0, 1), m_generator(new std::mt19937()) { - if (!deterministic) { - m_generator->seed(get_random_seed()); - } - } - NormalRandomGenerator(const NormalRandomGenerator& other) - : m_deterministic(other.m_deterministic), m_distribution(other.m_distribution), m_generator(new std::mt19937()) { - m_generator->seed(other(0) * UINT_MAX); - } - ~NormalRandomGenerator() { - delete m_generator; - } - template<typename Index> - T operator()(Index) const { - return m_distribution(*m_generator); - } - template<typename Index, typename PacketType> - PacketType packetOp(Index) const { - const int packetSize = internal::unpacket_traits<PacketType>::size; - EIGEN_ALIGN_MAX T values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = m_distribution(*m_generator); - } - return internal::pload<PacketType>(values); - } - - private: - // No assignment - NormalRandomGenerator& operator = (const NormalRandomGenerator&); - - bool m_deterministic; - mutable std::normal_distribution<T> m_distribution; - std::mt19937* m_generator; -}; - -#elif defined (EIGEN_USE_GPU) && defined(__CUDACC__) && defined(__CUDA_ARCH__) - -// We're compiling a cuda kernel -template <typename T> class NormalRandomGenerator; - -template <> class NormalRandomGenerator<float> { - public: - static const bool PacketAccess = true; - - __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ NormalRandomGenerator(const NormalRandomGenerator<float>& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - template<typename Index> - __device__ float operator()(Index) const { - return curand_normal(&m_state); - } - template<typename Index, typename PacketType> - __device__ float4 packetOp(Index) const { - EIGEN_STATIC_ASSERT((is_same<PacketType, float4>::value), YOU_MADE_A_PROGRAMMING_MISTAKE); - return curand_normal4(&m_state); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class NormalRandomGenerator<double> { - public: - static const bool PacketAccess = true; - - __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ NormalRandomGenerator(const NormalRandomGenerator<double>& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - template<typename Index> - __device__ double operator()(Index) const { - return curand_normal_double(&m_state); - } - template<typename Index, typename PacketType> - __device__ double2 packetOp(Index) const { - EIGEN_STATIC_ASSERT((is_same<PacketType, double2>::value), YOU_MADE_A_PROGRAMMING_MISTAKE); - return curand_normal2_double(&m_state); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class NormalRandomGenerator<std::complex<float> > { - public: - static const bool PacketAccess = false; - - __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ NormalRandomGenerator(const NormalRandomGenerator& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - template<typename Index> - __device__ std::complex<float> operator()(Index) const { - float4 vals = curand_normal4(&m_state); - return std::complex<float>(vals.x, vals.y); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -template <> class NormalRandomGenerator<std::complex<double> > { - public: - static const bool PacketAccess = false; - - __device__ NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) { - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - __device__ NormalRandomGenerator(const NormalRandomGenerator& other) { - m_deterministic = other.m_deterministic; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const int seed = m_deterministic ? 0 : get_random_seed(); - curand_init(seed, tid, 0, &m_state); - } - template<typename Index> - __device__ std::complex<double> operator()(Index) const { - double2 vals = curand_normal2_double(&m_state); - return std::complex<double>(vals.x, vals.y); - } - - private: - bool m_deterministic; - mutable curandStatePhilox4_32_10_t m_state; -}; - -#else - -template <typename T> class NormalRandomGenerator { - public: - static const bool PacketAccess = false; - NormalRandomGenerator(bool deterministic = true) : m_deterministic(deterministic) {} - - private: - bool m_deterministic; -}; - -#endif - -template <typename Scalar> -struct functor_traits<NormalRandomGenerator<Scalar> > { +template <typename T, typename Device> +struct reducer_traits<ArgMinTupleReducer<T>, Device> { enum { - // Rough estimate. - Cost = 100 * NumTraits<Scalar>::MulCost, - PacketAccess = NormalRandomGenerator<Scalar>::PacketAccess + Cost = NumTraits<T>::AddCost, + PacketAccess = false }; }; @@ -797,7 +455,7 @@ class GaussianGenerator { } } - T operator()(const array<Index, NumDims>& coordinates) const { + EIGEN_DEVICE_FUNC T operator()(const array<Index, NumDims>& coordinates) const { T tmp = T(0); for (size_t i = 0; i < NumDims; ++i) { T offset = coordinates[i] - m_means[i]; |