aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen
diff options
context:
space:
mode:
authorGravatar Eugene Zhulenev <ezhulenev@google.com>2018-08-08 16:57:58 -0700
committerGravatar Eugene Zhulenev <ezhulenev@google.com>2018-08-08 16:57:58 -0700
commit1c8b9e10a791cb43b4f730dcb5d7889099cc1c68 (patch)
treea62a2f74c6e2bec8367a01272743260ec7f54cef /unsupported/Eigen
parent1b0373ae10687ecc51ad9a0bfd46aa4ee116ade1 (diff)
parent131ed1191fa5ccbe0265fcfccfc685642c388192 (diff)
Merged with upstream eigen
Diffstat (limited to 'unsupported/Eigen')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBase.h24
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h6
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h23
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h3
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h4
-rwxr-xr-xunsupported/Eigen/src/AutoDiff/AutoDiffScalar.h5
-rw-r--r--unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h36
13 files changed, 88 insertions, 33 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
index 97f90f638..ab3731952 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
@@ -538,8 +538,8 @@ class TensorBase<Derived, ReadOnlyAccessors>
// Fourier transforms
template <int FFTDataType, int FFTDirection, typename FFT> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorFFTOp<const FFT, const Derived, FFTDataType, FFTDirection>
- fft(const FFT& fft) const {
- return TensorFFTOp<const FFT, const Derived, FFTDataType, FFTDirection>(derived(), fft);
+ fft(const FFT& dims) const {
+ return TensorFFTOp<const FFT, const Derived, FFTDataType, FFTDirection>(derived(), dims);
}
// Scan.
@@ -723,8 +723,8 @@ class TensorBase<Derived, ReadOnlyAccessors>
template <typename Broadcast> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorBroadcastingOp<const Broadcast, const Derived>
- broadcast(const Broadcast& broadcast) const {
- return TensorBroadcastingOp<const Broadcast, const Derived>(derived(), broadcast);
+ broadcast(const Broadcast& bcast) const {
+ return TensorBroadcastingOp<const Broadcast, const Derived>(derived(), bcast);
}
template <typename Axis, typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -832,8 +832,8 @@ class TensorBase<Derived, ReadOnlyAccessors>
}
template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorShufflingOp<const Shuffle, const Derived>
- shuffle(const Shuffle& shuffle) const {
- return TensorShufflingOp<const Shuffle, const Derived>(derived(), shuffle);
+ shuffle(const Shuffle& shfl) const {
+ return TensorShufflingOp<const Shuffle, const Derived>(derived(), shfl);
}
template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorStridingOp<const Strides, const Derived>
@@ -1030,13 +1030,13 @@ class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> {
template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorShufflingOp<const Shuffle, const Derived>
- shuffle(const Shuffle& shuffle) const {
- return TensorShufflingOp<const Shuffle, const Derived>(derived(), shuffle);
+ shuffle(const Shuffle& shfl) const {
+ return TensorShufflingOp<const Shuffle, const Derived>(derived(), shfl);
}
template <typename Shuffle> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
TensorShufflingOp<const Shuffle, Derived>
- shuffle(const Shuffle& shuffle) {
- return TensorShufflingOp<const Shuffle, Derived>(derived(), shuffle);
+ shuffle(const Shuffle& shfl) {
+ return TensorShufflingOp<const Shuffle, Derived>(derived(), shfl);
}
template <typename Strides> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
@@ -1052,8 +1052,8 @@ class TensorBase : public TensorBase<Derived, ReadOnlyAccessors> {
// Select the device on which to evaluate the expression.
template <typename DeviceType>
- TensorDevice<Derived, DeviceType> device(const DeviceType& device) {
- return TensorDevice<Derived, DeviceType>(device, derived());
+ TensorDevice<Derived, DeviceType> device(const DeviceType& dev) {
+ return TensorDevice<Derived, DeviceType>(dev, derived());
}
protected:
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
index 1db8d6124..877603421 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h
@@ -89,7 +89,7 @@ EIGEN_STRONG_INLINE void MergeResourceRequirements(
// policy if block shapes/sizes conflict).
*block_shape = resources[0].block_shape;
*block_total_size = resources[0].block_total_size;
- for (int i = 1; i < resources.size(); ++i) {
+ for (std::vector<TensorOpResourceRequirements>::size_type i = 1; i < resources.size(); ++i) {
if (resources[i].block_shape == TensorBlockShapeType::kSkewedInnerDims &&
*block_shape != TensorBlockShapeType::kSkewedInnerDims) {
*block_shape = TensorBlockShapeType::kSkewedInnerDims;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
index e1649fb47..e604456e8 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
@@ -274,8 +274,8 @@ struct TensorContractionEvaluatorBase
op.lhsExpression(), op.rhsExpression()), device),
m_rightImpl(choose(Cond<static_cast<int>(Layout) == static_cast<int>(ColMajor)>(),
op.rhsExpression(), op.lhsExpression()), device),
- m_output_kernel(op.outputKernel()),
m_device(device),
+ m_output_kernel(op.outputKernel()),
m_result(NULL) {
EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<LeftArgType, Device>::Layout) ==
static_cast<int>(TensorEvaluator<RightArgType, Device>::Layout)),
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
index 0d3ca966c..a07e32db0 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
@@ -527,8 +527,8 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
Scalar* local = (Scalar*)m_device.allocate(kernel_sz);
typedef TensorEvalToOp<const KernelArgType> EvalTo;
EvalTo evalToTmp(local, m_kernelArg);
- const bool PacketAccess = internal::IsVectorizable<Device, KernelArgType>::value;
- internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device);
+ const bool Vectorize = internal::IsVectorizable<Device, KernelArgType>::value;
+ internal::TensorExecutor<const EvalTo, Device, Vectorize>::run(evalToTmp, m_device);
m_kernel = local;
m_local_kernel = true;
@@ -786,7 +786,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr
};
EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const GpuDevice& device)
- : m_inputImpl(op.inputExpression(), device), m_kernelArg(op.kernelExpression()), m_kernelImpl(op.kernelExpression(), device), m_indices(op.indices()), m_buf(NULL), m_kernel(NULL), m_local_kernel(false), m_device(device)
+ : m_inputImpl(op.inputExpression(), device), m_kernelImpl(op.kernelExpression(), device), m_kernelArg(op.kernelExpression()), m_indices(op.indices()), m_buf(NULL), m_kernel(NULL), m_local_kernel(false), m_device(device)
{
EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<InputArgType, GpuDevice>::Layout) == static_cast<int>(TensorEvaluator<KernelArgType, GpuDevice>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
index 5a16ebe50..cc134228a 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
@@ -91,18 +91,31 @@ static EIGEN_STRONG_INLINE void wait_until_ready(SyncType* n) {
}
}
+// An abstract interface to a device specific memory allocator.
+class Allocator {
+ public:
+ virtual ~Allocator() {}
+ EIGEN_DEVICE_FUNC virtual void* allocate(size_t num_bytes) const = 0;
+ EIGEN_DEVICE_FUNC virtual void deallocate(void* buffer) const = 0;
+};
// Build a thread pool device on top the an existing pool of threads.
struct ThreadPoolDevice {
// The ownership of the thread pool remains with the caller.
- ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores) : pool_(pool), num_threads_(num_cores) { }
+ ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores, Allocator* allocator = nullptr)
+ : pool_(pool), num_threads_(num_cores), allocator_(allocator) { }
EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
- return internal::aligned_malloc(num_bytes);
+ return allocator_ ? allocator_->allocate(num_bytes)
+ : internal::aligned_malloc(num_bytes);
}
EIGEN_STRONG_INLINE void deallocate(void* buffer) const {
- internal::aligned_free(buffer);
+ if (allocator_) {
+ allocator_->deallocate(buffer);
+ } else {
+ internal::aligned_free(buffer);
+ }
}
EIGEN_STRONG_INLINE void* allocate_temp(size_t num_bytes) const {
@@ -275,9 +288,13 @@ struct ThreadPoolDevice {
// Thread pool accessor.
ThreadPoolInterface* getPool() const { return pool_; }
+ // Allocator accessor.
+ Allocator* allocator() const { return allocator_; }
+
private:
ThreadPoolInterface* pool_;
int num_threads_;
+ Allocator* allocator_;
};
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
index 8f7a81575..028902fea 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
@@ -126,7 +126,7 @@ struct TensorEvaluator
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
- std::vector<internal::TensorOpResourceRequirements>* resources) const {}
+ std::vector<internal::TensorOpResourceRequirements>*) const {}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(TensorBlock* block) const {
assert(m_data != NULL);
@@ -255,7 +255,7 @@ struct TensorEvaluator<const Derived, Device>
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements(
- std::vector<internal::TensorOpResourceRequirements>* resources) const {}
+ std::vector<internal::TensorOpResourceRequirements>*) const {}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(TensorBlock* block) const {
assert(m_data != NULL);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
index a456f308b..2778bf5ec 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
@@ -124,8 +124,8 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
}
typedef TensorEvalToOp< const typename internal::remove_const<ArgType>::type > EvalTo;
EvalTo evalToTmp(m_buffer, m_op);
- const bool PacketAccess = internal::IsVectorizable<Device, const ArgType>::value;
- internal::TensorExecutor<const EvalTo, typename internal::remove_const<Device>::type, PacketAccess>::run(evalToTmp, m_device);
+ const bool Vectorize = internal::IsVectorizable<Device, const ArgType>::value;
+ internal::TensorExecutor<const EvalTo, typename internal::remove_const<Device>::type, Vectorize>::run(evalToTmp, m_device);
return true;
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
index 8ed1796df..0dd524a30 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
@@ -21,6 +21,7 @@ namespace Eigen {
template<typename T> struct MakePointer {
typedef T* Type;
typedef T& RefType;
+ typedef T ScalarType;
};
namespace internal{
@@ -97,7 +98,7 @@ template<typename XprType> class TensorForcedEvalOp;
template<typename ExpressionType, typename DeviceType> class TensorDevice;
template<typename Derived, typename Device> struct TensorEvaluator;
-class NoOpOutputKernel;
+struct NoOpOutputKernel;
struct DefaultDevice;
struct ThreadPoolDevice;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
index fbe69aabc..98f125408 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h
@@ -61,8 +61,8 @@ class TensorShufflingOp : public TensorBase<TensorShufflingOp<Shuffle, XprType>
typedef typename Eigen::internal::traits<TensorShufflingOp>::StorageKind StorageKind;
typedef typename Eigen::internal::traits<TensorShufflingOp>::Index Index;
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp(const XprType& expr, const Shuffle& shuffle)
- : m_xpr(expr), m_shuffle(shuffle) {}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp(const XprType& expr, const Shuffle& shfl)
+ : m_xpr(expr), m_shuffle(shfl) {}
EIGEN_DEVICE_FUNC
const Shuffle& shufflePermutation() const { return m_shuffle; }
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h
index c8b2fad1e..ea53bb04b 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h
@@ -273,11 +273,11 @@ struct TensorEvaluator<const TensorTraceOp<Dims, ArgType>, Device>
Dimensions m_dimensions;
TensorEvaluator<ArgType, Device> m_impl;
+ // Initialize the size of the trace dimension
+ Index m_traceDim;
const Device& m_device;
array<bool, NumInputDims> m_reduced;
array<Index, NumReducedDims> m_reducedDims;
- // Initialize the size of the trace dimension
- Index m_traceDim;
array<Index, NumOutputDims> m_outputStrides;
array<Index, NumReducedDims> m_reducedStrides;
array<Index, NumOutputDims> m_preservedStrides;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
index 006b37921..0a394c88d 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h
@@ -59,6 +59,7 @@ struct traits<Tensor<Scalar_, NumIndices_, Options_, IndexType_> >
template <typename T> struct MakePointer {
typedef T* Type;
typedef T& RefType;
+ typedef T ScalarType;
};
typedef typename MakePointer<Scalar>::Type PointerType;
@@ -80,6 +81,7 @@ struct traits<TensorFixedSize<Scalar_, Dimensions, Options_, IndexType_> >
template <typename T> struct MakePointer {
typedef T* Type;
typedef T& RefType;
+ typedef T ScalarType;
};
typedef typename MakePointer<Scalar>::Type PointerType;
@@ -105,6 +107,8 @@ struct traits<TensorMap<PlainObjectType, Options_, MakePointer_> >
typedef MakePointer_<T> MakePointerT;
typedef typename MakePointerT::Type Type;
typedef typename MakePointerT::RefType RefType;
+ typedef typename MakePointerT::ScalarType ScalarType;
+
};
typedef typename MakePointer<Scalar>::Type PointerType;
diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
index 279fe5cd3..13d959df4 100755
--- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
+++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
@@ -684,10 +684,15 @@ template<typename DerType> struct NumTraits<AutoDiffScalar<DerType> >
}
namespace std {
+
template <typename T>
class numeric_limits<Eigen::AutoDiffScalar<T> >
: public numeric_limits<typename T::Scalar> {};
+template <typename T>
+class numeric_limits<Eigen::AutoDiffScalar<T&> >
+ : public numeric_limits<typename T::Scalar> {};
+
} // namespace std
#endif // EIGEN_AUTODIFF_SCALAR_H
diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
index dbcc9d8ac..5784cbc86 100644
--- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
+++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h
@@ -193,6 +193,8 @@ struct lgamma_impl<float> {
#if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__)
int dummy;
return ::lgammaf_r(x, &dummy);
+#elif defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__)
+ return cl::sycl::lgamma(x);
#else
return ::lgammaf(x);
#endif
@@ -206,6 +208,8 @@ struct lgamma_impl<double> {
#if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__)
int dummy;
return ::lgamma_r(x, &dummy);
+#elif defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__)
+ return cl::sycl::lgamma(x);
#else
return ::lgamma(x);
#endif
@@ -423,13 +427,25 @@ struct erf_retval {
template <>
struct erf_impl<float> {
EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE float run(float x) { return ::erff(x); }
+ static EIGEN_STRONG_INLINE float run(float x) {
+#if defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__)
+ return cl::sycl::erf(x);
+#else
+ return ::erff(x);
+#endif
+ }
};
template <>
struct erf_impl<double> {
EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE double run(double x) { return ::erf(x); }
+ static EIGEN_STRONG_INLINE double run(double x) {
+#if defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__)
+ return cl::sycl::erf(x);
+#else
+ return ::erf(x);
+#endif
+ }
};
#endif // EIGEN_HAS_C99_MATH
@@ -456,13 +472,25 @@ struct erfc_retval {
template <>
struct erfc_impl<float> {
EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE float run(const float x) { return ::erfcf(x); }
+ static EIGEN_STRONG_INLINE float run(const float x) {
+#if defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__)
+ return cl::sycl::erfc(x);
+#else
+ return ::erfcf(x);
+#endif
+ }
};
template <>
struct erfc_impl<double> {
EIGEN_DEVICE_FUNC
- static EIGEN_STRONG_INLINE double run(const double x) { return ::erfc(x); }
+ static EIGEN_STRONG_INLINE double run(const double x) {
+#if defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__)
+ return cl::sycl::erfc(x);
+#else
+ return ::erfc(x);
+#endif
+ }
};
#endif // EIGEN_HAS_C99_MATH