diff options
Diffstat (limited to 'unsupported/Eigen/CXX11/src')
16 files changed, 117 insertions, 96 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 142778df8..19d2b50b5 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -817,8 +817,8 @@ class TensorBase<Derived, ReadOnlyAccessors> EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast<const Derived*>(this); } }; -template<typename Derived> -class TensorBase<Derived, WriteAccessors> : public TensorBase<Derived, ReadOnlyAccessors> { +template<typename Derived, int AccessLevel = internal::accessors_level<Derived>::value> +class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> { public: typedef internal::traits<Derived> DerivedTraits; typedef typename DerivedTraits::Scalar Scalar; @@ -828,7 +828,7 @@ class TensorBase<Derived, WriteAccessors> : public TensorBase<Derived, ReadOnlyA template <typename Scalar, int NumIndices, int Options, typename IndexType> friend class Tensor; template <typename Scalar, typename Dimensions, int Option, typename IndexTypes> friend class TensorFixedSize; - template <typename OtherDerived, int AccessLevel> friend class TensorBase; + template <typename OtherDerived, int OtherAccessLevel> friend class TensorBase; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& setZero() { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h index 886474986..d65dbb40f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionCuda.h @@ -461,8 +461,8 @@ EigenContractionKernelInternal(const LhsMapper lhs, const RhsMapper rhs, #undef writeResultShmem #undef writeRow - const int max_i_write = (min)((int)((m_size - base_m - threadIdx.y + 7) / 8), 8); - const int max_j_write = (min)((int)((n_size - base_n - threadIdx.z + 7) / 8), 8); + const int max_i_write = numext::mini((int)((m_size - base_m - threadIdx.y + 7) / 8), 8); + const int max_j_write = numext::mini((int)((n_size - base_n - threadIdx.z + 7) / 8), 8); if (threadIdx.x < max_i_write) { if (max_j_write == 8) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h index b27e1a1b4..9b2cb3ff6 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h @@ -130,19 +130,19 @@ class SimpleTensorContractionMapper { } Index contract_val = left ? col : row; - for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) { - const Index idx = contract_val / m_k_strides[i]; - linidx += idx * m_contract_strides[i]; - contract_val -= idx * m_k_strides[i]; - } - if(array_size<contract_t>::value > 0) { - if (side == Rhs && inner_dim_contiguous) { - eigen_assert(m_contract_strides[0] == 1); - linidx += contract_val; - } else { - linidx += contract_val * m_contract_strides[0]; - } + for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) { + const Index idx = contract_val / m_k_strides[i]; + linidx += idx * m_contract_strides[i]; + contract_val -= idx * m_k_strides[i]; + } + + if (side == Rhs && inner_dim_contiguous) { + eigen_assert(m_contract_strides[0] == 1); + linidx += contract_val; + } else { + linidx += contract_val * m_contract_strides[0]; + } } return linidx; @@ -153,15 +153,15 @@ class SimpleTensorContractionMapper { const bool left = (side == Lhs); Index nocontract_val[2] = {left ? row : col, left ? row + distance : col}; Index linidx[2] = {0, 0}; - for (int i = static_cast<int>(array_size<nocontract_t>::value) - 1; i > 0; i--) { - const Index idx0 = nocontract_val[0] / m_ij_strides[i]; - const Index idx1 = nocontract_val[1] / m_ij_strides[i]; - linidx[0] += idx0 * m_nocontract_strides[i]; - linidx[1] += idx1 * m_nocontract_strides[i]; - nocontract_val[0] -= idx0 * m_ij_strides[i]; - nocontract_val[1] -= idx1 * m_ij_strides[i]; - } if (array_size<typename Tensor::Dimensions>::value > array_size<contract_t>::value) { + for (int i = static_cast<int>(array_size<nocontract_t>::value) - 1; i > 0; i--) { + const Index idx0 = nocontract_val[0] / m_ij_strides[i]; + const Index idx1 = nocontract_val[1] / m_ij_strides[i]; + linidx[0] += idx0 * m_nocontract_strides[i]; + linidx[1] += idx1 * m_nocontract_strides[i]; + nocontract_val[0] -= idx0 * m_ij_strides[i]; + nocontract_val[1] -= idx1 * m_ij_strides[i]; + } if (side == Lhs && inner_dim_contiguous) { eigen_assert(m_nocontract_strides[0] == 1); linidx[0] += nocontract_val[0]; @@ -173,22 +173,24 @@ class SimpleTensorContractionMapper { } Index contract_val[2] = {left ? col : row, left ? col : row + distance}; - for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) { - const Index idx0 = contract_val[0] / m_k_strides[i]; - const Index idx1 = contract_val[1] / m_k_strides[i]; - linidx[0] += idx0 * m_contract_strides[i]; - linidx[1] += idx1 * m_contract_strides[i]; - contract_val[0] -= idx0 * m_k_strides[i]; - contract_val[1] -= idx1 * m_k_strides[i]; - } + if (array_size<contract_t>::value> 0) { + for (int i = static_cast<int>(array_size<contract_t>::value) - 1; i > 0; i--) { + const Index idx0 = contract_val[0] / m_k_strides[i]; + const Index idx1 = contract_val[1] / m_k_strides[i]; + linidx[0] += idx0 * m_contract_strides[i]; + linidx[1] += idx1 * m_contract_strides[i]; + contract_val[0] -= idx0 * m_k_strides[i]; + contract_val[1] -= idx1 * m_k_strides[i]; + } - if (side == Rhs && inner_dim_contiguous) { - eigen_assert(m_contract_strides[0] == 1); - linidx[0] += contract_val[0]; - linidx[1] += contract_val[1]; - } else { - linidx[0] += contract_val[0] * m_contract_strides[0]; - linidx[1] += contract_val[1] * m_contract_strides[0]; + if (side == Rhs && inner_dim_contiguous) { + eigen_assert(m_contract_strides[0] == 1); + linidx[0] += contract_val[0]; + linidx[1] += contract_val[1]; + } else { + linidx[0] += contract_val[0] * m_contract_strides[0]; + linidx[1] += contract_val[1] * m_contract_strides[0]; + } } return IndexPair<Index>(linidx[0], linidx[1]); } @@ -200,7 +202,7 @@ class SimpleTensorContractionMapper { return (Alignment == Aligned) && (side == Lhs) && inner_dim_contiguous ? 0 : size; } EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index stride() const { - return ((side == Lhs) && inner_dim_contiguous) ? m_contract_strides[0] : 1; + return ((side == Lhs) && inner_dim_contiguous && array_size<contract_t>::value > 0) ? m_contract_strides[0] : 1; } protected: diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h index 34270730b..069680a11 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -151,9 +151,7 @@ struct ThreadPoolDevice { template <class Function, class... Args> EIGEN_STRONG_INLINE Notification* enqueue(Function&& f, Args&&... args) const { Notification* n = new Notification(); - std::function<void()> func = - std::bind(&FunctionWrapperWithNotification<Function, Args...>::run, n, f, args...); - pool_->Schedule(func); + pool_->Schedule(std::bind(&FunctionWrapperWithNotification<Function, Args...>::run, n, f, args...)); return n; } @@ -161,15 +159,13 @@ struct ThreadPoolDevice { EIGEN_STRONG_INLINE void enqueue_with_barrier(Barrier* b, Function&& f, Args&&... args) const { - std::function<void()> func = std::bind( - &FunctionWrapperWithBarrier<Function, Args...>::run, b, f, args...); - pool_->Schedule(func); + pool_->Schedule(std::bind( + &FunctionWrapperWithBarrier<Function, Args...>::run, b, f, args...)); } template <class Function, class... Args> EIGEN_STRONG_INLINE void enqueueNoNotification(Function&& f, Args&&... args) const { - std::function<void()> func = std::bind(f, args...); - pool_->Schedule(func); + pool_->Schedule(std::bind(f, args...)); } // Returns a logical thread index between 0 and pool_->NumThreads() - 1 if diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h index 26b1f65a8..a08dfa7c3 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h @@ -94,7 +94,7 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device> static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; enum { - IsAligned = true, + IsAligned = TensorEvaluator<ArgType, Device>::IsAligned, PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess, Layout = TensorEvaluator<ArgType, Device>::Layout, CoordAccess = false, // to be implemented diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index a48cb1daa..c2a327bf0 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -131,7 +131,7 @@ double loadConstant(const double* address) { } template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half loadConstant(const Eigen::half* address) { - return Eigen::half(internal::raw_uint16_to_half(__ldg(&address->x))); + return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x))); } #endif } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index ad5c97b57..a116bf17f 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -159,7 +159,6 @@ class TensorExecutor<Expression, ThreadPoolDevice, Vectorizable> { #else size_t num_threads = device.numThreads(); if (num_threads > 1) { - cost = evaluator.costPerCoeff(Vectorizable) num_threads = TensorCostModel<ThreadPoolDevice>::numThreads( size, evaluator.costPerCoeff(Vectorizable), num_threads); } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h index ece2ed91b..08eb5595a 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h @@ -329,7 +329,7 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D for (Index i = 0; i < n; ++i) { if(FFTDir == FFT_FORWARD) { - a[i] = data[i] * std::conj(pos_j_base_powered[i]); + a[i] = data[i] * numext::conj(pos_j_base_powered[i]); } else { a[i] = data[i] * pos_j_base_powered[i]; @@ -344,7 +344,7 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D b[i] = pos_j_base_powered[i]; } else { - b[i] = std::conj(pos_j_base_powered[i]); + b[i] = numext::conj(pos_j_base_powered[i]); } } for (Index i = n; i < m - n; ++i) { @@ -355,7 +355,7 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D b[i] = pos_j_base_powered[m-i]; } else { - b[i] = std::conj(pos_j_base_powered[m-i]); + b[i] = numext::conj(pos_j_base_powered[m-i]); } } @@ -379,7 +379,7 @@ struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, D for (Index i = 0; i < n; ++i) { if(FFTDir == FFT_FORWARD) { - data[i] = a[i] * std::conj(pos_j_base_powered[i]); + data[i] = a[i] * numext::conj(pos_j_base_powered[i]); } else { data[i] = a[i] * pos_j_base_powered[i]; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index f35275ffb..490ddd8bd 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -16,7 +16,7 @@ template<typename Scalar_, int NumIndices_, int Options_ = 0, typename IndexType template<typename Scalar_, typename Dimensions, int Options_ = 0, typename IndexType = DenseIndex> class TensorFixedSize; template<typename PlainObjectType, int Options_ = Unaligned> class TensorMap; template<typename PlainObjectType> class TensorRef; -template<typename Derived, int AccessLevel = internal::accessors_level<Derived>::value> class TensorBase; +template<typename Derived, int AccessLevel> class TensorBase; template<typename NullaryOp, typename PlainObjectType> class TensorCwiseNullaryOp; template<typename UnaryOp, typename XprType> class TensorCwiseUnaryOp; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h index f3a3a1b88..a901c5dd4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h @@ -13,11 +13,6 @@ namespace Eigen { namespace internal { -template<> -struct significant_decimals_impl<std::string> - : significant_decimals_default_impl<std::string, true> -{}; - // Print the tensor as a 2d matrix template <typename Tensor, int Rank> diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h index 33c6c1b0f..ede3939c2 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIntDiv.h @@ -29,25 +29,47 @@ namespace Eigen { namespace internal { namespace { + // Note: result is undefined if val == 0 template <typename T> - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE int count_leading_zeros(const T val) + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + typename internal::enable_if<sizeof(T)==4,int>::type count_leading_zeros(const T val) { #ifdef __CUDA_ARCH__ - return (sizeof(T) == 8) ? __clzll(val) : __clz(val); + return __clz(val); #elif EIGEN_COMP_MSVC - unsigned long index; - if (sizeof(T) == 8) { - _BitScanReverse64(&index, val); - } else { - _BitScanReverse(&index, val); - } - return (sizeof(T) == 8) ? 63 - index : 31 - index; + unsigned long index; + _BitScanReverse(&index, val); + return 31 - index; +#else + EIGEN_STATIC_ASSERT(sizeof(unsigned long long) == 8, YOU_MADE_A_PROGRAMMING_MISTAKE); + return __builtin_clz(static_cast<uint32_t>(val)); +#endif + } + + template <typename T> + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + typename internal::enable_if<sizeof(T)==8,int>::type count_leading_zeros(const T val) + { +#ifdef __CUDA_ARCH__ + return __clzll(val); +#elif EIGEN_COMP_MSVC && EIGEN_ARCH_x86_64 + unsigned long index; + _BitScanReverse64(&index, val); + return 63 - index; +#elif EIGEN_COMP_MSVC + // MSVC's _BitScanReverse64 is not available for 32bits builds. + unsigned int lo = (unsigned int)(val&0xffffffff); + unsigned int hi = (unsigned int)((val>>32)&0xffffffff); + int n; + if(hi==0) + n = 32 + count_leading_zeros<unsigned int>(lo); + else + n = count_leading_zeros<unsigned int>(hi); + return n; #else EIGEN_STATIC_ASSERT(sizeof(unsigned long long) == 8, YOU_MADE_A_PROGRAMMING_MISTAKE); - return (sizeof(T) == 8) ? - __builtin_clzll(static_cast<uint64_t>(val)) : - __builtin_clz(static_cast<uint32_t>(val)); + return __builtin_clzll(static_cast<uint64_t>(val)); #endif } @@ -98,7 +120,9 @@ namespace { return static_cast<uint64_t>((static_cast<__uint128_t>(1) << (64+log_div)) / static_cast<__uint128_t>(divider) - (static_cast<__uint128_t>(1) << 64) + 1); #else const uint64_t shift = 1ULL << log_div; - TensorUInt128<uint64_t, uint64_t> result = (TensorUInt128<uint64_t, static_val<0> >(shift, 0) / TensorUInt128<static_val<0>, uint64_t>(divider) - TensorUInt128<static_val<1>, static_val<0> >(1, 0) + TensorUInt128<static_val<0>, static_val<1> >(1)); + TensorUInt128<uint64_t, uint64_t> result = TensorUInt128<uint64_t, static_val<0> >(shift, 0) / TensorUInt128<static_val<0>, uint64_t>(divider) + - TensorUInt128<static_val<1>, static_val<0> >(1, 0) + + TensorUInt128<static_val<0>, static_val<1> >(1); return static_cast<uint64_t>(result); #endif } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 04ba45a8f..0e1576954 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -264,7 +264,7 @@ struct FullReducer<Self, Op, ThreadPoolDevice, Vectorizable> { const Index numblocks = blocksize > 0 ? num_coeffs / blocksize : 0; eigen_assert(num_coeffs >= numblocks * blocksize); - Barrier barrier(numblocks); + Barrier barrier(internal::convert_index<unsigned int>(numblocks)); MaxSizeVector<typename Self::CoeffReturnType> shards(numblocks, reducer.initialize()); for (Index i = 0; i < numblocks; ++i) { device.enqueue_with_barrier(&barrier, &FullReducerShard<Self, Op, Vectorizable>::run, @@ -492,7 +492,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device> } // Attempt to use an optimized reduction. - else if (RunningOnGPU && data && (m_device.majorDeviceVersion() >= 3)) { + else if (RunningOnGPU && (m_device.majorDeviceVersion() >= 3)) { bool reducing_inner_dims = true; for (int i = 0; i < NumReducedDims; ++i) { if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { @@ -505,8 +505,12 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device> (reducing_inner_dims || ReducingInnerMostDims)) { const Index num_values_to_reduce = internal::array_prod(m_reducedDims); const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); + if (!data && num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve) { + data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); + m_result = data; + } Op reducer(m_reducer); - return internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve); + return internal::InnerReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve) || (m_result != NULL); } bool preserving_inner_dims = true; @@ -521,8 +525,12 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device> preserving_inner_dims) { const Index num_values_to_reduce = internal::array_prod(m_reducedDims); const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); + if (!data && num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve) { + data = static_cast<CoeffReturnType*>(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); + m_result = data; + } Op reducer(m_reducer); - return internal::OuterReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve); + return internal::OuterReducer<Self, Op, Device>::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve) || (m_result != NULL); } } return true; @@ -537,8 +545,8 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { - if (RunningFullReduction && m_result) { - return *m_result; + if ((RunningFullReduction || RunningOnGPU) && m_result) { + return *(m_result + index); } Op reducer(m_reducer); if (ReducingInnerMostDims || RunningFullReduction) { @@ -558,7 +566,11 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index + PacketSize - 1 < internal::array_prod(dimensions())); + eigen_assert(index + PacketSize - 1 < Index(internal::array_prod(dimensions()))); + + if (RunningOnGPU && m_result) { + return internal::pload<PacketReturnType>(m_result + index); + } EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize]; if (ReducingInnerMostDims) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h index d3894e625..5e512490c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -336,11 +336,9 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> { static const bool HasOptimizedImplementation = !Op::IsStateful && (internal::is_same<typename Self::CoeffReturnType, float>::value || (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess)); -#elif __CUDA_ARCH__ >= 300 +#else static const bool HasOptimizedImplementation = !Op::IsStateful && internal::is_same<typename Self::CoeffReturnType, float>::value; -#else - static const bool HasOptimizedImplementation = false; #endif template <typename OutputType> @@ -619,11 +617,9 @@ struct InnerReducer<Self, Op, GpuDevice> { static const bool HasOptimizedImplementation = !Op::IsStateful && (internal::is_same<typename Self::CoeffReturnType, float>::value || (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess)); -#elif __CUDA_ARCH__ >= 300 +#else static const bool HasOptimizedImplementation = !Op::IsStateful && internal::is_same<typename Self::CoeffReturnType, float>::value; -#else - static const bool HasOptimizedImplementation = false; #endif template <typename OutputType> @@ -678,12 +674,8 @@ struct OuterReducer<Self, Op, GpuDevice> { // Unfortunately nvidia doesn't support well exotic types such as complex, // so reduce the scope of the optimized version of the code to the simple case // of floats. -#if __CUDA_ARCH__ >= 300 static const bool HasOptimizedImplementation = !Op::IsStateful && internal::is_same<typename Self::CoeffReturnType, float>::value; -#else - static const bool HasOptimizedImplementation = false; -#endif template <typename Device, typename OutputType> static EIGEN_DEVICE_FUNC bool run(const Self&, Op&, const Device&, OutputType*, typename Self::Index, typename Self::Index) { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h index a61b14ded..8501466ce 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h @@ -94,7 +94,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> { enum { IsAligned = false, - PacketAccess = (internal::packet_traits<Scalar>::size > 1), + PacketAccess = (internal::unpacket_traits<PacketReturnType>::size > 1), BlockAccess = false, Layout = TensorEvaluator<ArgType, Device>::Layout, CoordAccess = false, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h b/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h index bdcd70fd9..3523e7c94 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h @@ -20,6 +20,7 @@ struct static_val { EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE operator uint64_t() const { return n; } EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val() { } + template <typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static_val(const T& v) { eigen_assert(v == n); @@ -53,7 +54,7 @@ struct TensorUInt128 template<typename T> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE explicit TensorUInt128(const T& x) : high(0), low(x) { - eigen_assert((static_cast<typename conditional<sizeof(T) == 8, uint64_t, uint32_t>::type>(x) <= static_cast<typename conditional<sizeof(LOW) == 8, uint64_t, uint32_t>::type>(NumTraits<LOW>::highest()))); + eigen_assert((static_cast<typename conditional<sizeof(T) == 8, uint64_t, uint32_t>::type>(x) <= NumTraits<uint64_t>::highest())); eigen_assert(x >= 0); } diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h b/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h index d2204ad5b..399f95cc1 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h @@ -21,14 +21,14 @@ struct StlThreadEnvironment { // destructor must join the thread. class EnvThread { public: - EnvThread(std::function<void()> f) : thr_(f) {} + EnvThread(std::function<void()> f) : thr_(std::move(f)) {} ~EnvThread() { thr_.join(); } private: std::thread thr_; }; - EnvThread* CreateThread(std::function<void()> f) { return new EnvThread(f); } + EnvThread* CreateThread(std::function<void()> f) { return new EnvThread(std::move(f)); } Task CreateTask(std::function<void()> f) { return Task{std::move(f)}; } void ExecuteTask(const Task& t) { t.f(); } }; |