diff options
author | Eugene Zhulenev <ezhulenev@google.com> | 2018-08-08 16:57:58 -0700 |
---|---|---|
committer | Eugene Zhulenev <ezhulenev@google.com> | 2018-08-08 16:57:58 -0700 |
commit | 1c8b9e10a791cb43b4f730dcb5d7889099cc1c68 (patch) | |
tree | a62a2f74c6e2bec8367a01272743260ec7f54cef /unsupported/Eigen | |
parent | 1b0373ae10687ecc51ad9a0bfd46aa4ee116ade1 (diff) | |
parent | 131ed1191fa5ccbe0265fcfccfc685642c388192 (diff) |
Merged with upstream eigen
Diffstat (limited to 'unsupported/Eigen')
13 files changed, 88 insertions, 33 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 97f90f638..ab3731952 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -538,8 +538,8 @@ class TensorBase<Derived, ReadOnlyAccessors> // Fourier transforms template <int FFTDataType, int FFTDirection, typename FFT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorFFTOp<const FFT, const Derived, FFTDataType, FFTDirection> - fft(const FFT& fft) const { - return TensorFFTOp<const FFT, const Derived, FFTDataType, FFTDirection>(derived(), fft); + fft(const FFT& dims) const { + return TensorFFTOp<const FFT, const Derived, FFTDataType, FFTDirection>(derived(), dims); } // Scan. @@ -723,8 +723,8 @@ class TensorBase<Derived, ReadOnlyAccessors> template <typename Broadcast> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorBroadcastingOp<const Broadcast, const Derived> - broadcast(const Broadcast& broadcast) const { - return TensorBroadcastingOp<const Broadcast, const Derived>(derived(), broadcast); + broadcast(const Broadcast& bcast) const { + return TensorBroadcastingOp<const Broadcast, const Derived>(derived(), bcast); } template <typename Axis, typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -832,8 +832,8 @@ class TensorBase<Derived, ReadOnlyAccessors> } template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorShufflingOp<const Shuffle, const Derived> - shuffle(const Shuffle& shuffle) const { - return TensorShufflingOp<const Shuffle, const Derived>(derived(), shuffle); + shuffle(const Shuffle& shfl) const { + return TensorShufflingOp<const Shuffle, const Derived>(derived(), shfl); } template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorStridingOp<const Strides, const Derived> @@ -1030,13 +1030,13 @@ class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> { template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorShufflingOp<const Shuffle, const Derived> - shuffle(const Shuffle& shuffle) const { - return TensorShufflingOp<const Shuffle, const Derived>(derived(), shuffle); + shuffle(const Shuffle& shfl) const { + return TensorShufflingOp<const Shuffle, const Derived>(derived(), shfl); } template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp<const Shuffle, Derived> - shuffle(const Shuffle& shuffle) { - return TensorShufflingOp<const Shuffle, Derived>(derived(), shuffle); + shuffle(const Shuffle& shfl) { + return TensorShufflingOp<const Shuffle, Derived>(derived(), shfl); } template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -1052,8 +1052,8 @@ class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> { // Select the device on which to evaluate the expression. template <typename DeviceType> - TensorDevice<Derived, DeviceType> device(const DeviceType& device) { - return TensorDevice<Derived, DeviceType>(device, derived()); + TensorDevice<Derived, DeviceType> device(const DeviceType& dev) { + return TensorDevice<Derived, DeviceType>(dev, derived()); } protected: diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h index 1db8d6124..877603421 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h @@ -89,7 +89,7 @@ EIGEN_STRONG_INLINE void MergeResourceRequirements( // policy if block shapes/sizes conflict). *block_shape = resources[0].block_shape; *block_total_size = resources[0].block_total_size; - for (int i = 1; i < resources.size(); ++i) { + for (std::vector<TensorOpResourceRequirements>::size_type i = 1; i < resources.size(); ++i) { if (resources[i].block_shape == TensorBlockShapeType::kSkewedInnerDims && *block_shape != TensorBlockShapeType::kSkewedInnerDims) { *block_shape = TensorBlockShapeType::kSkewedInnerDims; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index e1649fb47..e604456e8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -274,8 +274,8 @@ struct TensorContractionEvaluatorBase op.lhsExpression(), op.rhsExpression()), device), m_rightImpl(choose(Cond<static_cast<int>(Layout) == static_cast<int>(ColMajor)>(), op.rhsExpression(), op.lhsExpression()), device), - m_output_kernel(op.outputKernel()), m_device(device), + m_output_kernel(op.outputKernel()), m_result(NULL) { EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout)), diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h index 0d3ca966c..a07e32db0 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h @@ -527,8 +527,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr Scalar* local = (Scalar*)m_device.allocate(kernel_sz); typedef TensorEvalToOp<const KernelArgType> EvalTo; EvalTo evalToTmp(local, m_kernelArg); - const bool PacketAccess = internal::IsVectorizable<Device, KernelArgType>::value; - internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device); + const bool Vectorize = internal::IsVectorizable<Device, KernelArgType>::value; + internal::TensorExecutor<const EvalTo, Device, Vectorize>::run(evalToTmp, m_device); m_kernel = local; m_local_kernel = true; @@ -786,7 +786,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr }; EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const GpuDevice& device) - : m_inputImpl(op.inputExpression(), device), m_kernelArg(op.kernelExpression()), m_kernelImpl(op.kernelExpression(), device), m_indices(op.indices()), m_buf(NULL), m_kernel(NULL), m_local_kernel(false), m_device(device) + : m_inputImpl(op.inputExpression(), device), m_kernelImpl(op.kernelExpression(), device), m_kernelArg(op.kernelExpression()), m_indices(op.indices()), m_buf(NULL), m_kernel(NULL), m_local_kernel(false), m_device(device) { EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<InputArgType, GpuDevice>::Layout) == static_cast<int>(TensorEvaluator<KernelArgType, GpuDevice>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h index 5a16ebe50..cc134228a 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -91,18 +91,31 @@ static EIGEN_STRONG_INLINE void wait_until_ready(SyncType* n) { } } +// An abstract interface to a device specific memory allocator. +class Allocator { + public: + virtual ~Allocator() {} + EIGEN_DEVICE_FUNC virtual void* allocate(size_t num_bytes) const = 0; + EIGEN_DEVICE_FUNC virtual void deallocate(void* buffer) const = 0; +}; // Build a thread pool device on top the an existing pool of threads. struct ThreadPoolDevice { // The ownership of the thread pool remains with the caller. - ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores) : pool_(pool), num_threads_(num_cores) { } + ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores, Allocator* allocator = nullptr) + : pool_(pool), num_threads_(num_cores), allocator_(allocator) { } EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { - return internal::aligned_malloc(num_bytes); + return allocator_ ? allocator_->allocate(num_bytes) + : internal::aligned_malloc(num_bytes); } EIGEN_STRONG_INLINE void deallocate(void* buffer) const { - internal::aligned_free(buffer); + if (allocator_) { + allocator_->deallocate(buffer); + } else { + internal::aligned_free(buffer); + } } EIGEN_STRONG_INLINE void* allocate_temp(size_t num_bytes) const { @@ -275,9 +288,13 @@ struct ThreadPoolDevice { // Thread pool accessor. ThreadPoolInterface* getPool() const { return pool_; } + // Allocator accessor. + Allocator* allocator() const { return allocator_; } + private: ThreadPoolInterface* pool_; int num_threads_; + Allocator* allocator_; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index 8f7a81575..028902fea 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -126,7 +126,7 @@ struct TensorEvaluator } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector<internal::TensorOpResourceRequirements>* resources) const {} + std::vector<internal::TensorOpResourceRequirements>*) const {} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(TensorBlock* block) const { assert(m_data != NULL); @@ -255,7 +255,7 @@ struct TensorEvaluator<const Derived, Device> } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector<internal::TensorOpResourceRequirements>* resources) const {} + std::vector<internal::TensorOpResourceRequirements>*) const {} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(TensorBlock* block) const { assert(m_data != NULL); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h index a456f308b..2778bf5ec 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -124,8 +124,8 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device> } typedef TensorEvalToOp< const typename internal::remove_const<ArgType>::type > EvalTo; EvalTo evalToTmp(m_buffer, m_op); - const bool PacketAccess = internal::IsVectorizable<Device, const ArgType>::value; - internal::TensorExecutor<const EvalTo, typename internal::remove_const<Device>::type, PacketAccess>::run(evalToTmp, m_device); + const bool Vectorize = internal::IsVectorizable<Device, const ArgType>::value; + internal::TensorExecutor<const EvalTo, typename internal::remove_const<Device>::type, Vectorize>::run(evalToTmp, m_device); return true; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index 8ed1796df..0dd524a30 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -21,6 +21,7 @@ namespace Eigen { template<typename T> struct MakePointer { typedef T* Type; typedef T& RefType; + typedef T ScalarType; }; namespace internal{ @@ -97,7 +98,7 @@ template<typename XprType> class TensorForcedEvalOp; template<typename ExpressionType, typename DeviceType> class TensorDevice; template<typename Derived, typename Device> struct TensorEvaluator; -class NoOpOutputKernel; +struct NoOpOutputKernel; struct DefaultDevice; struct ThreadPoolDevice; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h index fbe69aabc..98f125408 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h @@ -61,8 +61,8 @@ class TensorShufflingOp : public TensorBase<TensorShufflingOp<Shuffle, XprType> typedef typename Eigen::internal::traits<TensorShufflingOp>::StorageKind StorageKind; typedef typename Eigen::internal::traits<TensorShufflingOp>::Index Index; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp(const XprType& expr, const Shuffle& shuffle) - : m_xpr(expr), m_shuffle(shuffle) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp(const XprType& expr, const Shuffle& shfl) + : m_xpr(expr), m_shuffle(shfl) {} EIGEN_DEVICE_FUNC const Shuffle& shufflePermutation() const { return m_shuffle; } diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h index c8b2fad1e..ea53bb04b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h @@ -273,11 +273,11 @@ struct TensorEvaluator<const TensorTraceOp<Dims, ArgType>, Device> Dimensions m_dimensions; TensorEvaluator<ArgType, Device> m_impl; + // Initialize the size of the trace dimension + Index m_traceDim; const Device& m_device; array<bool, NumInputDims> m_reduced; array<Index, NumReducedDims> m_reducedDims; - // Initialize the size of the trace dimension - Index m_traceDim; array<Index, NumOutputDims> m_outputStrides; array<Index, NumReducedDims> m_reducedStrides; array<Index, NumOutputDims> m_preservedStrides; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h index 006b37921..0a394c88d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h @@ -59,6 +59,7 @@ struct traits<Tensor<Scalar_, NumIndices_, Options_, IndexType_> > template <typename T> struct MakePointer { typedef T* Type; typedef T& RefType; + typedef T ScalarType; }; typedef typename MakePointer<Scalar>::Type PointerType; @@ -80,6 +81,7 @@ struct traits<TensorFixedSize<Scalar_, Dimensions, Options_, IndexType_> > template <typename T> struct MakePointer { typedef T* Type; typedef T& RefType; + typedef T ScalarType; }; typedef typename MakePointer<Scalar>::Type PointerType; @@ -105,6 +107,8 @@ struct traits<TensorMap<PlainObjectType, Options_, MakePointer_> > typedef MakePointer_<T> MakePointerT; typedef typename MakePointerT::Type Type; typedef typename MakePointerT::RefType RefType; + typedef typename MakePointerT::ScalarType ScalarType; + }; typedef typename MakePointer<Scalar>::Type PointerType; diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h index 279fe5cd3..13d959df4 100755 --- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h @@ -684,10 +684,15 @@ template<typename DerType> struct NumTraits<AutoDiffScalar<DerType> > } namespace std { + template <typename T> class numeric_limits<Eigen::AutoDiffScalar<T> > : public numeric_limits<typename T::Scalar> {}; +template <typename T> +class numeric_limits<Eigen::AutoDiffScalar<T&> > + : public numeric_limits<typename T::Scalar> {}; + } // namespace std #endif // EIGEN_AUTODIFF_SCALAR_H diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h index dbcc9d8ac..5784cbc86 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h @@ -193,6 +193,8 @@ struct lgamma_impl<float> { #if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__) int dummy; return ::lgammaf_r(x, &dummy); +#elif defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__) + return cl::sycl::lgamma(x); #else return ::lgammaf(x); #endif @@ -206,6 +208,8 @@ struct lgamma_impl<double> { #if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__) int dummy; return ::lgamma_r(x, &dummy); +#elif defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__) + return cl::sycl::lgamma(x); #else return ::lgamma(x); #endif @@ -423,13 +427,25 @@ struct erf_retval { template <> struct erf_impl<float> { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE float run(float x) { return ::erff(x); } + static EIGEN_STRONG_INLINE float run(float x) { +#if defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__) + return cl::sycl::erf(x); +#else + return ::erff(x); +#endif + } }; template <> struct erf_impl<double> { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE double run(double x) { return ::erf(x); } + static EIGEN_STRONG_INLINE double run(double x) { +#if defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__) + return cl::sycl::erf(x); +#else + return ::erf(x); +#endif + } }; #endif // EIGEN_HAS_C99_MATH @@ -456,13 +472,25 @@ struct erfc_retval { template <> struct erfc_impl<float> { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE float run(const float x) { return ::erfcf(x); } + static EIGEN_STRONG_INLINE float run(const float x) { +#if defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__) + return cl::sycl::erfc(x); +#else + return ::erfcf(x); +#endif + } }; template <> struct erfc_impl<double> { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE double run(const double x) { return ::erfc(x); } + static EIGEN_STRONG_INLINE double run(const double x) { +#if defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__) + return cl::sycl::erfc(x); +#else + return ::erfc(x); +#endif + } }; #endif // EIGEN_HAS_C99_MATH |