aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2016-06-23 15:08:03 -0700
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2016-06-23 15:08:03 -0700
commitd39df320d29ecc678e019962dfb2bdf64b061197 (patch)
tree131bed5dfcb1964e2a06473117e5b0ed3d571119 /unsupported/Eigen
parentf1f2ff8208f82680aabd9e191810d0cd10be9048 (diff)
parent361dbd246d0b0f0ceff8d6dea6991807cffde821 (diff)
Resolve merge.
Diffstat (limited to 'unsupported/Eigen')
-rw-r--r--unsupported/Eigen/CXX11/Tensor1
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBase.h90
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h12
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h37
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h10
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h95
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h80
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h1
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h86
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h33
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorIO.h66
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h37
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h6
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h60
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorScan.h34
-rw-r--r--unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h22
-rwxr-xr-xunsupported/Eigen/src/AutoDiff/AutoDiffScalar.h109
-rw-r--r--unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h4
19 files changed, 598 insertions, 187 deletions
diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor
index 859147404..79bac2f67 100644
--- a/unsupported/Eigen/CXX11/Tensor
+++ b/unsupported/Eigen/CXX11/Tensor
@@ -80,6 +80,7 @@ typedef unsigned __int64 uint64_t;
#include "src/Tensor/TensorTraits.h"
#include "src/Tensor/TensorUInt128.h"
#include "src/Tensor/TensorIntDiv.h"
+#include "src/Tensor/TensorGlobalFunctions.h"
#include "src/Tensor/TensorBase.h"
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
index 12f8a1499..73bfac40e 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
@@ -204,64 +204,62 @@ class TensorBase<Derived, ReadOnlyAccessors>
}
EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_pow_op<Scalar>, const Derived>
+ EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::bind2nd_op<internal::scalar_pow_op<Scalar,Scalar> >, const Derived>
pow(Scalar exponent) const {
- return unaryExpr(internal::scalar_pow_op<Scalar>(exponent));
+ return unaryExpr(internal::bind2nd_op<internal::scalar_pow_op<Scalar,Scalar> >(exponent));
}
EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_add_op<Scalar>, const Derived>
+ EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::bind2nd_op<internal::scalar_sum_op<Scalar,Scalar> >, const Derived>
operator+ (Scalar rhs) const {
- return unaryExpr(internal::scalar_add_op<Scalar>(rhs));
+ return unaryExpr(internal::bind2nd_op<internal::scalar_sum_op<Scalar,Scalar> >(rhs));
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE friend
- const TensorCwiseUnaryOp<internal::scalar_add_op<Scalar>, const Derived>
+ const TensorCwiseUnaryOp<internal::bind1st_op<internal::scalar_sum_op<Scalar> >, const Derived>
operator+ (Scalar lhs, const Derived& rhs) {
- return rhs + lhs;
+ return rhs.unaryExpr(internal::bind1st_op<internal::scalar_sum_op<Scalar> >(lhs));
}
EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_sub_op<Scalar>, const Derived>
+ EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::bind2nd_op<internal::scalar_difference_op<Scalar,Scalar> >, const Derived>
operator- (Scalar rhs) const {
EIGEN_STATIC_ASSERT((NumTraits<Scalar>::IsSigned || internal::is_same<Scalar, const std::complex<float> >::value), YOU_MADE_A_PROGRAMMING_MISTAKE);
- return unaryExpr(internal::scalar_sub_op<Scalar>(rhs));
+ return unaryExpr(internal::bind2nd_op<internal::scalar_difference_op<Scalar,Scalar> >(rhs));
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE friend
- const TensorCwiseUnaryOp<internal::scalar_add_op<Scalar>,
- const TensorCwiseUnaryOp<internal::scalar_opposite_op<Scalar>, const Derived> >
+ const TensorCwiseUnaryOp<internal::bind1st_op<internal::scalar_difference_op<Scalar> >, const Derived>
operator- (Scalar lhs, const Derived& rhs) {
- return -rhs + lhs;
+ return rhs.unaryExpr(internal::bind1st_op<internal::scalar_difference_op<Scalar> >(lhs));
}
EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Derived>
+ EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::bind2nd_op<internal::scalar_product_op<Scalar,Scalar> >, const Derived>
operator* (Scalar rhs) const {
- return unaryExpr(internal::scalar_multiple_op<Scalar>(rhs));
+ return unaryExpr(internal::bind2nd_op<internal::scalar_product_op<Scalar,Scalar> >(rhs));
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE friend
- const TensorCwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const Derived>
+ const TensorCwiseUnaryOp<internal::bind1st_op<internal::scalar_product_op<Scalar> >, const Derived>
operator* (Scalar lhs, const Derived& rhs) {
- return rhs * lhs;
+ return rhs.unaryExpr(internal::bind1st_op<internal::scalar_product_op<Scalar> >(lhs));
}
EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_quotient1_op<Scalar>, const Derived>
+ EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::bind2nd_op<internal::scalar_quotient_op<Scalar,Scalar> >, const Derived>
operator/ (Scalar rhs) const {
- return unaryExpr(internal::scalar_quotient1_op<Scalar>(rhs));
+ return unaryExpr(internal::bind2nd_op<internal::scalar_quotient_op<Scalar,Scalar> >(rhs));
}
EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE friend
- const TensorCwiseUnaryOp<internal::scalar_multiple_op<Scalar>,
- const TensorCwiseUnaryOp<internal::scalar_inverse_op<Scalar>, const Derived> >
+ const TensorCwiseUnaryOp<internal::bind1st_op<internal::scalar_quotient_op<Scalar> >, const Derived>
operator/ (Scalar lhs, const Derived& rhs) {
- return rhs.inverse() * lhs;
+ return rhs.unaryExpr(internal::bind1st_op<internal::scalar_quotient_op<Scalar> >(lhs));
}
EIGEN_DEVICE_FUNC
@@ -307,7 +305,6 @@ class TensorBase<Derived, ReadOnlyAccessors>
return unaryExpr(internal::scalar_floor_op<Scalar>());
}
-
// Generic binary operation support.
template <typename CustomBinaryOp, typename OtherDerived> EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<CustomBinaryOp, const Derived, const OtherDerived>
@@ -372,66 +369,66 @@ class TensorBase<Derived, ReadOnlyAccessors>
// Comparisons and tests.
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_LT>, const Derived, const OtherDerived>
+ const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LT>, const Derived, const OtherDerived>
operator<(const OtherDerived& other) const {
- return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_LT>());
+ return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LT>());
}
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_LE>, const Derived, const OtherDerived>
+ const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LE>, const Derived, const OtherDerived>
operator<=(const OtherDerived& other) const {
- return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_LE>());
+ return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LE>());
}
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_GT>, const Derived, const OtherDerived>
+ const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GT>, const Derived, const OtherDerived>
operator>(const OtherDerived& other) const {
- return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_GT>());
+ return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GT>());
}
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_GE>, const Derived, const OtherDerived>
+ const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GE>, const Derived, const OtherDerived>
operator>=(const OtherDerived& other) const {
- return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_GE>());
+ return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GE>());
}
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_EQ>, const Derived, const OtherDerived>
+ const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_EQ>, const Derived, const OtherDerived>
operator==(const OtherDerived& other) const {
- return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_EQ>());
+ return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_EQ>());
}
template<typename OtherDerived> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_NEQ>, const Derived, const OtherDerived>
+ const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_NEQ>, const Derived, const OtherDerived>
operator!=(const OtherDerived& other) const {
- return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, internal::cmp_NEQ>());
+ return binaryExpr(other.derived(), internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_NEQ>());
}
// comparisons and tests for Scalars
EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_LT>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
+ EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LT>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
operator<(Scalar threshold) const {
return operator<(constant(threshold));
}
EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_LE>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
+ EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_LE>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
operator<=(Scalar threshold) const {
return operator<=(constant(threshold));
}
EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_GT>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
+ EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GT>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
operator>(Scalar threshold) const {
return operator>(constant(threshold));
}
EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_GE>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
+ EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_GE>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
operator>=(Scalar threshold) const {
return operator>=(constant(threshold));
}
EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_EQ>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
+ EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_EQ>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
operator==(Scalar threshold) const {
return operator==(constant(threshold));
}
EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, internal::cmp_NEQ>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
+ EIGEN_STRONG_INLINE const TensorCwiseBinaryOp<internal::scalar_cmp_op<Scalar, Scalar, internal::cmp_NEQ>, const Derived, const TensorCwiseNullaryOp<internal::scalar_constant_op<Scalar>, const Derived> >
operator!=(Scalar threshold) const {
return operator!=(constant(threshold));
}
@@ -487,15 +484,22 @@ class TensorBase<Derived, ReadOnlyAccessors>
typedef TensorScanOp<internal::SumReducer<CoeffReturnType>, const Derived> TensorScanSumOp;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorScanSumOp
- cumsum(const Index& axis) const {
- return TensorScanSumOp(derived(), axis);
+ cumsum(const Index& axis, bool exclusive = false) const {
+ return TensorScanSumOp(derived(), axis, exclusive);
}
typedef TensorScanOp<internal::ProdReducer<CoeffReturnType>, const Derived> TensorScanProdOp;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const TensorScanProdOp
- cumprod(const Index& axis) const {
- return TensorScanProdOp(derived(), axis);
+ cumprod(const Index& axis, bool exclusive = false) const {
+ return TensorScanProdOp(derived(), axis, exclusive);
+ }
+
+ template <typename Reducer>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ const TensorScanOp<Reducer, const Derived>
+ scan(const Index& axis, const Reducer& reducer, bool exclusive = false) const {
+ return TensorScanOp<Reducer, const Derived>(derived(), axis, exclusive, reducer);
}
// Reductions.
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
index a60a17049..ee16cde9b 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
@@ -202,7 +202,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
// across k dimension.
const TensorOpCost cost =
contractionCost(m, n, bm, bn, bk, shard_by_col, false);
- Index num_threads = TensorCostModel<ThreadPoolDevice>::numThreads(
+ int num_threads = TensorCostModel<ThreadPoolDevice>::numThreads(
static_cast<double>(n) * m, cost, this->m_device.numThreads());
// TODO(dvyukov): this is a stop-gap to prevent regressions while the cost
@@ -301,7 +301,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
class Context {
public:
Context(const Device& device, int num_threads, LhsMapper& lhs,
- RhsMapper& rhs, Scalar* buffer, Index m, Index n, Index k, Index bm,
+ RhsMapper& rhs, Scalar* buffer, Index tm, Index tn, Index tk, Index bm,
Index bn, Index bk, Index nm, Index nn, Index nk, Index gm,
Index gn, Index nm0, Index nn0, bool shard_by_col,
bool parallel_pack)
@@ -309,13 +309,13 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
lhs_(lhs),
rhs_(rhs),
buffer_(buffer),
- output_(buffer, m),
+ output_(buffer, tm),
num_threads_(num_threads),
shard_by_col_(shard_by_col),
parallel_pack_(parallel_pack),
- m_(m),
- n_(n),
- k_(k),
+ m_(tm),
+ n_(tn),
+ k_(tk),
bm_(bm),
bn_(bn),
bk_(bk),
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
index 6c12b2ed8..1468caa23 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
@@ -12,6 +12,8 @@
namespace Eigen {
+static const int kCudaScratchSize = 1024;
+
// This defines an interface that GPUDevice can take to use
// CUDA streams underneath.
class StreamInterface {
@@ -27,6 +29,12 @@ class StreamInterface {
// Return a scratchpad buffer of size 1k
virtual void* scratchpad() const = 0;
+
+ // Return a semaphore. The semaphore is initially initialized to 0, and
+ // each kernel using it is responsible for resetting to 0 upon completion
+ // to maintain the invariant that the semaphore is always equal to 0 upon
+ // each kernel start.
+ virtual unsigned int* semaphore() const = 0;
};
static cudaDeviceProp* m_deviceProperties;
@@ -65,12 +73,12 @@ static const cudaStream_t default_stream = cudaStreamDefault;
class CudaStreamDevice : public StreamInterface {
public:
// Use the default stream on the current device
- CudaStreamDevice() : stream_(&default_stream), scratch_(NULL) {
+ CudaStreamDevice() : stream_(&default_stream), scratch_(NULL), semaphore_(NULL) {
cudaGetDevice(&device_);
initializeDeviceProp();
}
// Use the default stream on the specified device
- CudaStreamDevice(int device) : stream_(&default_stream), device_(device), scratch_(NULL) {
+ CudaStreamDevice(int device) : stream_(&default_stream), device_(device), scratch_(NULL), semaphore_(NULL) {
initializeDeviceProp();
}
// Use the specified stream. Note that it's the
@@ -78,7 +86,7 @@ class CudaStreamDevice : public StreamInterface {
// the specified device. If no device is specified the code
// assumes that the stream is associated to the current gpu device.
CudaStreamDevice(const cudaStream_t* stream, int device = -1)
- : stream_(stream), device_(device), scratch_(NULL) {
+ : stream_(stream), device_(device), scratch_(NULL), semaphore_(NULL) {
if (device < 0) {
cudaGetDevice(&device_);
} else {
@@ -123,15 +131,27 @@ class CudaStreamDevice : public StreamInterface {
virtual void* scratchpad() const {
if (scratch_ == NULL) {
- scratch_ = allocate(1024);
+ scratch_ = allocate(kCudaScratchSize + sizeof(unsigned int));
}
return scratch_;
}
+ virtual unsigned int* semaphore() const {
+ if (semaphore_ == NULL) {
+ char* scratch = static_cast<char*>(scratchpad()) + kCudaScratchSize;
+ semaphore_ = reinterpret_cast<unsigned int*>(scratch);
+ cudaError_t err = cudaMemsetAsync(semaphore_, 0, sizeof(unsigned int), *stream_);
+ EIGEN_UNUSED_VARIABLE(err)
+ assert(err == cudaSuccess);
+ }
+ return semaphore_;
+ }
+
private:
const cudaStream_t* stream_;
int device_;
mutable void* scratch_;
+ mutable unsigned int* semaphore_;
};
struct GpuDevice {
@@ -174,6 +194,15 @@ struct GpuDevice {
#endif
}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE unsigned int* semaphore() const {
+#ifndef __CUDA_ARCH__
+ return stream_->semaphore();
+#else
+ eigen_assert(false && "The default device should be used instead to generate kernel code");
+ return NULL;
+#endif
+ }
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const {
#ifndef __CUDA_ARCH__
cudaError_t err = cudaMemcpyAsync(dst, src, n, cudaMemcpyDeviceToDevice,
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
index 9073c611a..0af91fe64 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h
@@ -106,7 +106,7 @@ static EIGEN_STRONG_INLINE void wait_until_ready(SyncType* n) {
// Build a thread pool device on top the an existing pool of threads.
struct ThreadPoolDevice {
// The ownership of the thread pool remains with the caller.
- ThreadPoolDevice(ThreadPoolInterface* pool, size_t num_cores) : pool_(pool), num_threads_(num_cores) { }
+ ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores) : pool_(pool), num_threads_(num_cores) { }
EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const {
return internal::aligned_malloc(num_bytes);
@@ -130,7 +130,7 @@ struct ThreadPoolDevice {
::memset(buffer, c, n);
}
- EIGEN_STRONG_INLINE size_t numThreads() const {
+ EIGEN_STRONG_INLINE int numThreads() const {
return num_threads_;
}
@@ -186,7 +186,7 @@ struct ThreadPoolDevice {
std::function<void(Index, Index)> f) const {
typedef TensorCostModel<ThreadPoolDevice> CostModel;
if (n <= 1 || numThreads() == 1 ||
- CostModel::numThreads(n, cost, numThreads()) == 1) {
+ CostModel::numThreads(n, cost, static_cast<int>(numThreads())) == 1) {
f(0, n);
return;
}
@@ -246,7 +246,7 @@ struct ThreadPoolDevice {
// Recursively divide size into halves until we reach block_size.
// Division code rounds mid to block_size, so we are guaranteed to get
// block_count leaves that do actual computations.
- Barrier barrier(block_count);
+ Barrier barrier(static_cast<unsigned int>(block_count));
std::function<void(Index, Index)> handleRange;
handleRange = [=, &handleRange, &barrier, &f](Index first, Index last) {
if (last - first <= block_size) {
@@ -272,7 +272,7 @@ struct ThreadPoolDevice {
private:
ThreadPoolInterface* pool_;
- size_t num_threads_;
+ int num_threads_;
};
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
index 31b361c83..a48cb1daa 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h
@@ -403,6 +403,101 @@ struct TensorEvaluator<const TensorCwiseBinaryOp<BinaryOp, LeftArgType, RightArg
TensorEvaluator<RightArgType, Device> m_rightImpl;
};
+// -------------------- CwiseTernaryOp --------------------
+
+template<typename TernaryOp, typename Arg1Type, typename Arg2Type, typename Arg3Type, typename Device>
+struct TensorEvaluator<const TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type>, Device>
+{
+ typedef TensorCwiseTernaryOp<TernaryOp, Arg1Type, Arg2Type, Arg3Type> XprType;
+
+ enum {
+ IsAligned = TensorEvaluator<Arg1Type, Device>::IsAligned & TensorEvaluator<Arg2Type, Device>::IsAligned & TensorEvaluator<Arg3Type, Device>::IsAligned,
+ PacketAccess = TensorEvaluator<Arg1Type, Device>::PacketAccess & TensorEvaluator<Arg2Type, Device>::PacketAccess & TensorEvaluator<Arg3Type, Device>::PacketAccess &
+ internal::functor_traits<TernaryOp>::PacketAccess,
+ Layout = TensorEvaluator<Arg1Type, Device>::Layout,
+ CoordAccess = false, // to be implemented
+ RawAccess = false
+ };
+
+ EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device)
+ : m_functor(op.functor()),
+ m_arg1Impl(op.arg1Expression(), device),
+ m_arg2Impl(op.arg2Expression(), device),
+ m_arg3Impl(op.arg3Expression(), device)
+ {
+ EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<Arg1Type, Device>::Layout) == static_cast<int>(TensorEvaluator<Arg3Type, Device>::Layout) || internal::traits<XprType>::NumDimensions <= 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
+
+ EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::StorageKind,
+ typename internal::traits<Arg2Type>::StorageKind>::value),
+ STORAGE_KIND_MUST_MATCH)
+ EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::StorageKind,
+ typename internal::traits<Arg3Type>::StorageKind>::value),
+ STORAGE_KIND_MUST_MATCH)
+ EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::Index,
+ typename internal::traits<Arg2Type>::Index>::value),
+ STORAGE_INDEX_MUST_MATCH)
+ EIGEN_STATIC_ASSERT((internal::is_same<typename internal::traits<Arg1Type>::Index,
+ typename internal::traits<Arg3Type>::Index>::value),
+ STORAGE_INDEX_MUST_MATCH)
+
+ eigen_assert(dimensions_match(m_arg1Impl.dimensions(), m_arg2Impl.dimensions()) && dimensions_match(m_arg1Impl.dimensions(), m_arg3Impl.dimensions()));
+ }
+
+ typedef typename XprType::Index Index;
+ typedef typename XprType::Scalar Scalar;
+ typedef typename internal::traits<XprType>::Scalar CoeffReturnType;
+ typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
+ static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ typedef typename TensorEvaluator<Arg1Type, Device>::Dimensions Dimensions;
+
+ EIGEN_DEVICE_FUNC const Dimensions& dimensions() const
+ {
+ // TODO: use arg2 or arg3 dimensions if they are known at compile time.
+ return m_arg1Impl.dimensions();
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
+ m_arg1Impl.evalSubExprsIfNeeded(NULL);
+ m_arg2Impl.evalSubExprsIfNeeded(NULL);
+ m_arg3Impl.evalSubExprsIfNeeded(NULL);
+ return true;
+ }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
+ m_arg1Impl.cleanup();
+ m_arg2Impl.cleanup();
+ m_arg3Impl.cleanup();
+ }
+
+ EIGEN_DEVICE_FUNC CoeffReturnType coeff(Index index) const
+ {
+ return m_functor(m_arg1Impl.coeff(index), m_arg2Impl.coeff(index), m_arg3Impl.coeff(index));
+ }
+ template<int LoadMode>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
+ {
+ return m_functor.packetOp(m_arg1Impl.template packet<LoadMode>(index),
+ m_arg2Impl.template packet<LoadMode>(index),
+ m_arg3Impl.template packet<LoadMode>(index));
+ }
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
+ costPerCoeff(bool vectorized) const {
+ const double functor_cost = internal::functor_traits<TernaryOp>::Cost;
+ return m_arg1Impl.costPerCoeff(vectorized) +
+ m_arg2Impl.costPerCoeff(vectorized) +
+ m_arg3Impl.costPerCoeff(vectorized) +
+ TensorOpCost(0, 0, functor_cost, vectorized, PacketSize);
+ }
+
+ EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; }
+
+ private:
+ const TernaryOp m_functor;
+ TensorEvaluator<Arg1Type, Device> m_arg1Impl;
+ TensorEvaluator<Arg1Type, Device> m_arg2Impl;
+ TensorEvaluator<Arg3Type, Device> m_arg3Impl;
+};
+
// -------------------- SelectOp --------------------
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h
index ea250d8bc..5f2e329f2 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExpr.h
@@ -219,6 +219,86 @@ class TensorCwiseBinaryOp : public TensorBase<TensorCwiseBinaryOp<BinaryOp, LhsX
namespace internal {
+template<typename TernaryOp, typename Arg1XprType, typename Arg2XprType, typename Arg3XprType>
+struct traits<TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprType> >
+{
+ // Type promotion to handle the case where the types of the args are different.
+ typedef typename result_of<
+ TernaryOp(typename Arg1XprType::Scalar,
+ typename Arg2XprType::Scalar,
+ typename Arg3XprType::Scalar)>::type Scalar;
+ typedef traits<Arg1XprType> XprTraits;
+ typedef typename traits<Arg1XprType>::StorageKind StorageKind;
+ typedef typename traits<Arg1XprType>::Index Index;
+ typedef typename Arg1XprType::Nested Arg1Nested;
+ typedef typename Arg2XprType::Nested Arg2Nested;
+ typedef typename Arg3XprType::Nested Arg3Nested;
+ typedef typename remove_reference<Arg1Nested>::type _Arg1Nested;
+ typedef typename remove_reference<Arg2Nested>::type _Arg2Nested;
+ typedef typename remove_reference<Arg3Nested>::type _Arg3Nested;
+ static const int NumDimensions = XprTraits::NumDimensions;
+ static const int Layout = XprTraits::Layout;
+
+ enum {
+ Flags = 0
+ };
+};
+
+template<typename TernaryOp, typename Arg1XprType, typename Arg2XprType, typename Arg3XprType>
+struct eval<TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprType>, Eigen::Dense>
+{
+ typedef const TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprType>& type;
+};
+
+template<typename TernaryOp, typename Arg1XprType, typename Arg2XprType, typename Arg3XprType>
+struct nested<TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprType>, 1, typename eval<TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprType> >::type>
+{
+ typedef TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprType> type;
+};
+
+} // end namespace internal
+
+
+
+template<typename TernaryOp, typename Arg1XprType, typename Arg2XprType, typename Arg3XprType>
+class TensorCwiseTernaryOp : public TensorBase<TensorCwiseTernaryOp<TernaryOp, Arg1XprType, Arg2XprType, Arg3XprType>, ReadOnlyAccessors>
+{
+ public:
+ typedef typename Eigen::internal::traits<TensorCwiseTernaryOp>::Scalar Scalar;
+ typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
+ typedef Scalar CoeffReturnType;
+ typedef typename Eigen::internal::nested<TensorCwiseTernaryOp>::type Nested;
+ typedef typename Eigen::internal::traits<TensorCwiseTernaryOp>::StorageKind StorageKind;
+ typedef typename Eigen::internal::traits<TensorCwiseTernaryOp>::Index Index;
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorCwiseTernaryOp(const Arg1XprType& arg1, const Arg2XprType& arg2, const Arg3XprType& arg3, const TernaryOp& func = TernaryOp())
+ : m_arg1_xpr(arg1), m_arg2_xpr(arg2), m_arg3_xpr(arg3), m_functor(func) {}
+
+ EIGEN_DEVICE_FUNC
+ const TernaryOp& functor() const { return m_functor; }
+
+ /** \returns the nested expressions */
+ EIGEN_DEVICE_FUNC
+ const typename internal::remove_all<typename Arg1XprType::Nested>::type&
+ arg1Expression() const { return m_arg1_xpr; }
+
+ EIGEN_DEVICE_FUNC
+ const typename internal::remove_all<typename Arg1XprType::Nested>::type&
+ arg2Expression() const { return m_arg2_xpr; }
+
+ EIGEN_DEVICE_FUNC
+ const typename internal::remove_all<typename Arg3XprType::Nested>::type&
+ arg3Expression() const { return m_arg3_xpr; }
+
+ protected:
+ typename Arg1XprType::Nested m_arg1_xpr;
+ typename Arg1XprType::Nested m_arg2_xpr;
+ typename Arg3XprType::Nested m_arg3_xpr;
+ const TernaryOp m_functor;
+};
+
+
+namespace internal {
template<typename IfXprType, typename ThenXprType, typename ElseXprType>
struct traits<TensorSelectOp<IfXprType, ThenXprType, ElseXprType> >
: traits<ThenXprType>
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
index a1a18d938..f35275ffb 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h
@@ -21,6 +21,7 @@ template<typename Derived, int AccessLevel = internal::accessors_level<Derived>:
template<typename NullaryOp, typename PlainObjectType> class TensorCwiseNullaryOp;
template<typename UnaryOp, typename XprType> class TensorCwiseUnaryOp;
template<typename BinaryOp, typename LeftXprType, typename RightXprType> class TensorCwiseBinaryOp;
+template<typename TernaryOp, typename Arg1XprType, typename Arg2XprType, typename Arg3XprType> class TensorCwiseTernaryOp;
template<typename IfXprType, typename ThenXprType, typename ElseXprType> class TensorSelectOp;
template<typename Op, typename Dims, typename XprType> class TensorReductionOp;
template<typename XprType> class TensorIndexTupleOp;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
index 3dd32e9d1..a8e48fced 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFunctors.h
@@ -84,6 +84,14 @@ struct functor_traits<scalar_sigmoid_op<T> > {
};
+template<typename Reducer, typename Device>
+struct reducer_traits {
+ enum {
+ Cost = 1,
+ PacketAccess = false
+ };
+};
+
// Standard reduction functors
template <typename T> struct SumReducer
{
@@ -119,6 +127,15 @@ template <typename T> struct SumReducer
}
};
+template <typename T, typename Device>
+struct reducer_traits<SumReducer<T>, Device> {
+ enum {
+ Cost = NumTraits<T>::AddCost,
+ PacketAccess = PacketType<T, Device>::HasAdd
+ };
+};
+
+
template <typename T> struct MeanReducer
{
static const bool PacketAccess = packet_traits<T>::HasAdd && !NumTraits<T>::IsInteger;
@@ -162,6 +179,15 @@ template <typename T> struct MeanReducer
DenseIndex packetCount_;
};
+template <typename T, typename Device>
+struct reducer_traits<MeanReducer<T>, Device> {
+ enum {
+ Cost = NumTraits<T>::AddCost,
+ PacketAccess = PacketType<T, Device>::HasAdd
+ };
+};
+
+
template <typename T> struct MaxReducer
{
static const bool PacketAccess = packet_traits<T>::HasMax;
@@ -195,6 +221,15 @@ template <typename T> struct MaxReducer
}
};
+template <typename T, typename Device>
+struct reducer_traits<MaxReducer<T>, Device> {
+ enum {
+ Cost = NumTraits<T>::AddCost,
+ PacketAccess = PacketType<T, Device>::HasMax
+ };
+};
+
+
template <typename T> struct MinReducer
{
static const bool PacketAccess = packet_traits<T>::HasMin;
@@ -228,6 +263,14 @@ template <typename T> struct MinReducer
}
};
+template <typename T, typename Device>
+struct reducer_traits<MinReducer<T>, Device> {
+ enum {
+ Cost = NumTraits<T>::AddCost,
+ PacketAccess = PacketType<T, Device>::HasMin
+ };
+};
+
template <typename T> struct ProdReducer
{
@@ -263,6 +306,14 @@ template <typename T> struct ProdReducer
}
};
+template <typename T, typename Device>
+struct reducer_traits<ProdReducer<T>, Device> {
+ enum {
+ Cost = NumTraits<T>::MulCost,
+ PacketAccess = PacketType<T, Device>::HasMul
+ };
+};
+
struct AndReducer
{
@@ -280,6 +331,15 @@ struct AndReducer
}
};
+template <typename Device>
+struct reducer_traits<AndReducer, Device> {
+ enum {
+ Cost = 1,
+ PacketAccess = false
+ };
+};
+
+
struct OrReducer {
static const bool PacketAccess = false;
static const bool IsStateful = false;
@@ -295,6 +355,15 @@ struct OrReducer {
}
};
+template <typename Device>
+struct reducer_traits<OrReducer, Device> {
+ enum {
+ Cost = 1,
+ PacketAccess = false
+ };
+};
+
+
// Argmin/Argmax reducers
template <typename T> struct ArgMaxTupleReducer
{
@@ -312,6 +381,15 @@ template <typename T> struct ArgMaxTupleReducer
}
};
+template <typename T, typename Device>
+struct reducer_traits<ArgMaxTupleReducer<T>, Device> {
+ enum {
+ Cost = NumTraits<T>::AddCost,
+ PacketAccess = false
+ };
+};
+
+
template <typename T> struct ArgMinTupleReducer
{
static const bool PacketAccess = false;
@@ -328,6 +406,14 @@ template <typename T> struct ArgMinTupleReducer
}
};
+template <typename T, typename Device>
+struct reducer_traits<ArgMinTupleReducer<T>, Device> {
+ enum {
+ Cost = NumTraits<T>::AddCost,
+ PacketAccess = false
+ };
+};
+
// Random number generation
namespace {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h b/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h
new file mode 100644
index 000000000..665b861cf
--- /dev/null
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorGlobalFunctions.h
@@ -0,0 +1,33 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Eugene Brevdo <ebrevdo@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H
+#define EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H
+
+namespace Eigen {
+
+/** \cpp11 \returns an expression of the coefficient-wise betainc(\a x, \a a, \a b) to the given tensors.
+ *
+ * This function computes the regularized incomplete beta function (integral).
+ *
+ */
+template <typename ADerived, typename BDerived, typename XDerived>
+EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const
+ TensorCwiseTernaryOp<internal::scalar_betainc_op<typename XDerived::Scalar>,
+ const ADerived, const BDerived, const XDerived>
+ betainc(const ADerived& a, const BDerived& b, const XDerived& x) {
+ return TensorCwiseTernaryOp<
+ internal::scalar_betainc_op<typename XDerived::Scalar>, const ADerived,
+ const BDerived, const XDerived>(
+ a, b, x, internal::scalar_betainc_op<typename XDerived::Scalar>());
+}
+
+} // end namespace Eigen
+
+#endif // EIGEN_CXX11_TENSOR_TENSOR_GLOBAL_FUNCTIONS_H
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
index 38a833f82..f3a3a1b88 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorIO.h
@@ -17,34 +17,62 @@ template<>
struct significant_decimals_impl<std::string>
: significant_decimals_default_impl<std::string, true>
{};
-}
+// Print the tensor as a 2d matrix
+template <typename Tensor, int Rank>
+struct TensorPrinter {
+ static void run (std::ostream& os, const Tensor& tensor) {
+ typedef typename internal::remove_const<typename Tensor::Scalar>::type Scalar;
+ typedef typename Tensor::Index Index;
+ const Index total_size = internal::array_prod(tensor.dimensions());
+ if (total_size > 0) {
+ const Index first_dim = Eigen::internal::array_get<0>(tensor.dimensions());
+ static const int layout = Tensor::Layout;
+ Map<const Array<Scalar, Dynamic, Dynamic, layout> > matrix(const_cast<Scalar*>(tensor.data()), first_dim, total_size/first_dim);
+ os << matrix;
+ }
+ }
+};
+
+
+// Print the tensor as a vector
+template <typename Tensor>
+struct TensorPrinter<Tensor, 1> {
+ static void run (std::ostream& os, const Tensor& tensor) {
+ typedef typename internal::remove_const<typename Tensor::Scalar>::type Scalar;
+ typedef typename Tensor::Index Index;
+ const Index total_size = internal::array_prod(tensor.dimensions());
+ if (total_size > 0) {
+ Map<const Array<Scalar, Dynamic, 1> > array(const_cast<Scalar*>(tensor.data()), total_size);
+ os << array;
+ }
+ }
+};
+
+
+// Print the tensor as a scalar
+template <typename Tensor>
+struct TensorPrinter<Tensor, 0> {
+ static void run (std::ostream& os, const Tensor& tensor) {
+ os << tensor.coeff(0);
+ }
+};
+}
+
template <typename T>
std::ostream& operator << (std::ostream& os, const TensorBase<T, ReadOnlyAccessors>& expr) {
+ typedef TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice> Evaluator;
+ typedef typename Evaluator::Dimensions Dimensions;
+
// Evaluate the expression if needed
TensorForcedEvalOp<const T> eval = expr.eval();
- TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice> tensor(eval, DefaultDevice());
+ Evaluator tensor(eval, DefaultDevice());
tensor.evalSubExprsIfNeeded(NULL);
- typedef typename internal::remove_const<typename T::Scalar>::type Scalar;
- typedef typename T::Index Index;
- typedef typename TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice>::Dimensions Dimensions;
- const Index total_size = internal::array_prod(tensor.dimensions());
-
- // Print the tensor as a 1d vector or a 2d matrix.
+ // Print the result
static const int rank = internal::array_size<Dimensions>::value;
- if (rank == 0) {
- os << tensor.coeff(0);
- } else if (rank == 1) {
- Map<const Array<Scalar, Dynamic, 1> > array(const_cast<Scalar*>(tensor.data()), total_size);
- os << array;
- } else {
- const Index first_dim = Eigen::internal::array_get<0>(tensor.dimensions());
- static const int layout = TensorEvaluator<const TensorForcedEvalOp<const T>, DefaultDevice>::Layout;
- Map<const Array<Scalar, Dynamic, Dynamic, layout> > matrix(const_cast<Scalar*>(tensor.data()), first_dim, total_size/first_dim);
- os << matrix;
- }
+ internal::TensorPrinter<Evaluator, rank>::run(os, tensor);
// Cleanup.
tensor.cleanup();
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
index b1645d56f..fdb5ee6b8 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMeta.h
@@ -47,22 +47,39 @@ template <> struct max_n_1<0> {
// Default packet types
template <typename Scalar, typename Device>
-struct PacketType {
+struct PacketType : internal::packet_traits<Scalar> {
typedef typename internal::packet_traits<Scalar>::type type;
- enum { size = internal::unpacket_traits<type>::size };
};
// For CUDA packet types when using a GpuDevice
-#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
+#if defined(EIGEN_USE_GPU) && defined(__CUDACC__) && defined(EIGEN_HAS_CUDA_FP16)
template <>
-struct PacketType<float, GpuDevice> {
- typedef float4 type;
- static const int size = 4;
-};
-template <>
-struct PacketType<double, GpuDevice> {
- typedef double2 type;
+struct PacketType<half, GpuDevice> {
+ typedef half2 type;
static const int size = 2;
+ enum {
+ HasAdd = 1,
+ HasSub = 1,
+ HasMul = 1,
+ HasNegate = 1,
+ HasAbs = 1,
+ HasArg = 0,
+ HasAbs2 = 0,
+ HasMin = 1,
+ HasMax = 1,
+ HasConj = 0,
+ HasSetLinear = 0,
+ HasBlend = 0,
+
+ HasDiv = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasExp = 1,
+ HasLog = 1,
+ HasLog1p = 0,
+ HasLog10 = 0,
+ HasPow = 1,
+ };
};
#endif
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
index 52cfc2824..d34f1e328 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h
@@ -148,7 +148,7 @@ struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
EIGEN_DEVICE_FUNC Scalar* data() const { return const_cast<Scalar*>(m_impl.data()); }
- const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
+ EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
protected:
TensorEvaluator<ArgType, Device> m_impl;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
index 99a09c058..04ba45a8f 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@@ -316,7 +316,7 @@ struct OuterReducer {
#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
template <int B, int N, typename S, typename R, typename I>
-__global__ void FullReductionKernel(R, const S, I, typename S::CoeffReturnType*);
+__global__ void FullReductionKernel(R, const S, I, typename S::CoeffReturnType*, unsigned int*);
#ifdef EIGEN_HAS_CUDA_FP16
@@ -558,7 +558,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
- eigen_assert(index + PacketSize - 1 < dimensions().TotalSize());
+ eigen_assert(index + PacketSize - 1 < internal::array_prod(dimensions()));
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
if (ReducingInnerMostDims) {
@@ -616,7 +616,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
template <typename S, typename O, bool V> friend struct internal::FullReducerShard;
#endif
#if defined(EIGEN_USE_GPU) && defined(__CUDACC__)
- template <int B, int N, typename S, typename R, typename I> friend void internal::FullReductionKernel(R, const S, I, typename S::CoeffReturnType*);
+ template <int B, int N, typename S, typename R, typename I> friend void internal::FullReductionKernel(R, const S, I, typename S::CoeffReturnType*, unsigned int*);
#ifdef EIGEN_HAS_CUDA_FP16
template <typename S, typename R, typename I> friend void internal::ReductionInitFullReduxKernelHalfFloat(R, const S, I, half2*);
template <int B, int N, typename S, typename R, typename I> friend void internal::FullReductionKernelHalfFloat(R, const S, I, half*, half2*);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
index 45087a9a4..d9bbcd858 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h
@@ -112,17 +112,42 @@ __global__ void ReductionInitKernel(const CoeffType val, Index num_preserved_coe
}
}
+
template <int BlockSize, int NumPerThread, typename Self,
typename Reducer, typename Index>
__global__ void FullReductionKernel(Reducer reducer, const Self input, Index num_coeffs,
- typename Self::CoeffReturnType* output) {
+ typename Self::CoeffReturnType* output, unsigned int* semaphore) {
+ // Initialize the output value
const Index first_index = blockIdx.x * BlockSize * NumPerThread + threadIdx.x;
-
- // Initialize the output value if it wasn't initialized by the ReductionInitKernel
- if (gridDim.x == 1 && first_index == 0) {
- *output = reducer.initialize();
- __syncthreads();
+ if (gridDim.x == 1) {
+ if (first_index == 0) {
+ *output = reducer.initialize();
+ }
}
+ else {
+ if (threadIdx.x == 0) {
+ unsigned int block = atomicCAS(semaphore, 0u, 1u);
+ if (block == 0) {
+ // We're the first block to run, initialize the output value
+ atomicExch(output, reducer.initialize());
+ __threadfence();
+ atomicExch(semaphore, 2u);
+ }
+ else {
+ // Wait for the first block to initialize the output value.
+ // Use atomicCAS here to ensure that the reads aren't cached
+ unsigned int val;
+ do {
+ val = atomicCAS(semaphore, 2u, 2u);
+ }
+ while (val < 2u);
+ }
+ }
+ }
+
+ __syncthreads();
+
+ eigen_assert(gridDim.x == 1 || *semaphore >= 2u);
typename Self::CoeffReturnType accum = reducer.initialize();
Index max_iter = numext::mini<Index>(num_coeffs - first_index, NumPerThread*BlockSize);
@@ -141,6 +166,11 @@ __global__ void FullReductionKernel(Reducer reducer, const Self input, Index num
if ((threadIdx.x & (warpSize - 1)) == 0) {
atomicReduce(output, accum, reducer);
}
+
+ if (gridDim.x > 1 && threadIdx.x == 0) {
+ // Let the last block reset the semaphore
+ atomicInc(semaphore, gridDim.x + 1);
+ }
}
@@ -246,15 +276,13 @@ struct FullReductionLauncher<Self, Op, float, PacketAccess> {
const int num_per_thread = 128;
const int num_blocks = divup<int>(num_coeffs, block_size * num_per_thread);
+ unsigned int* semaphore = NULL;
if (num_blocks > 1) {
- // We initialize the outputs outside the reduction kernel when we can't be sure that there
- // won't be a race conditions between multiple thread blocks.
- LAUNCH_CUDA_KERNEL((ReductionInitKernel<Scalar, Index>),
- 1, 32, 0, device, reducer.initialize(), 1, output);
+ semaphore = device.semaphore();
}
LAUNCH_CUDA_KERNEL((FullReductionKernel<block_size, num_per_thread, Self, Op, Index>),
- num_blocks, block_size, 0, device, reducer, self, num_coeffs, output);
+ num_blocks, block_size, 0, device, reducer, self, num_coeffs, output, semaphore);
}
};
@@ -300,10 +328,10 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
// Unfortunately nvidia doesn't support well exotic types such as complex,
// so reduce the scope of the optimized version of the code to the simple case
// of floats and half floats.
- #ifdef EIGEN_HAS_CUDA_FP16
+#ifdef EIGEN_HAS_CUDA_FP16
static const bool HasOptimizedImplementation = !Op::IsStateful &&
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
- (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && Op::PacketAccess));
+ (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
#else
static const bool HasOptimizedImplementation = !Op::IsStateful &&
internal::is_same<typename Self::CoeffReturnType, float>::value;
@@ -318,7 +346,7 @@ struct FullReducer<Self, Op, GpuDevice, Vectorizable> {
return;
}
- FullReductionLauncher<Self, Op, OutputType, Op::PacketAccess>::run(self, reducer, device, output, num_coeffs);
+ FullReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs);
}
};
@@ -580,7 +608,7 @@ struct InnerReducer<Self, Op, GpuDevice> {
#ifdef EIGEN_HAS_CUDA_FP16
static const bool HasOptimizedImplementation = !Op::IsStateful &&
(internal::is_same<typename Self::CoeffReturnType, float>::value ||
- (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && Op::PacketAccess));
+ (internal::is_same<typename Self::CoeffReturnType, Eigen::half>::value && reducer_traits<Op, GpuDevice>::PacketAccess));
#else
static const bool HasOptimizedImplementation = !Op::IsStateful &&
internal::is_same<typename Self::CoeffReturnType, float>::value;
@@ -599,7 +627,7 @@ struct InnerReducer<Self, Op, GpuDevice> {
return true;
}
- return InnerReductionLauncher<Self, Op, OutputType, Op::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals);
+ return InnerReductionLauncher<Self, Op, OutputType, reducer_traits<Op, GpuDevice>::PacketAccess>::run(self, reducer, device, output, num_coeffs_to_reduce, num_preserved_vals);
}
};
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
index 031dbf6f2..1aa196b84 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorScan.h
@@ -57,8 +57,8 @@ public:
typedef typename Eigen::internal::traits<TensorScanOp>::Index Index;
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorScanOp(
- const XprType& expr, const Index& axis, const Op& op = Op())
- : m_expr(expr), m_axis(axis), m_accumulator(op) {}
+ const XprType& expr, const Index& axis, bool exclusive = false, const Op& op = Op())
+ : m_expr(expr), m_axis(axis), m_accumulator(op), m_exclusive(exclusive) {}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Index axis() const { return m_axis; }
@@ -66,11 +66,14 @@ public:
const XprType& expression() const { return m_expr; }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
const Op accumulator() const { return m_accumulator; }
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
+ bool exclusive() const { return m_exclusive; }
protected:
typename XprType::Nested m_expr;
const Index m_axis;
const Op m_accumulator;
+ const bool m_exclusive;
};
// Eval as rvalue
@@ -81,7 +84,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
typedef typename XprType::Index Index;
static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
typedef DSizes<Index, NumDims> Dimensions;
- typedef typename XprType::Scalar Scalar;
+ typedef typename internal::remove_const<typename XprType::Scalar>::type Scalar;
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
@@ -99,6 +102,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
: m_impl(op.expression(), device),
m_device(device),
m_axis(op.axis()),
+ m_exclusive(op.exclusive()),
m_accumulator(op.accumulator()),
m_dimensions(m_impl.dimensions()),
m_size(m_dimensions[m_axis]),
@@ -106,7 +110,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
m_output(NULL) {
// Accumulating a scalar isn't supported.
- EIGEN_STATIC_ASSERT(NumDims > 0, YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
eigen_assert(m_axis >= 0 && m_axis < NumDims);
// Compute stride of scan axis
@@ -122,7 +126,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {
- return m_dimensions;
+ return m_dimensions;
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) {
@@ -136,7 +140,7 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
return true;
}
}
-
+
template<int LoadMode>
EIGEN_DEVICE_FUNC PacketReturnType packet(Index index) const {
return internal::ploadt<PacketReturnType, LoadMode>(m_output + index);
@@ -152,6 +156,10 @@ struct TensorEvaluator<const TensorScanOp<Op, ArgType>, Device> {
return m_output[index];
}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool) const {
+ return TensorOpCost(sizeof(CoeffReturnType), 0, 0);
+ }
+
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
if (m_output != NULL) {
m_device.deallocate(m_output);
@@ -164,6 +172,7 @@ protected:
TensorEvaluator<ArgType, Device> m_impl;
const Device& m_device;
const Index m_axis;
+ const bool m_exclusive;
Op m_accumulator;
const Dimensions& m_dimensions;
const Index& m_size;
@@ -172,7 +181,7 @@ protected:
// TODO(ibab) Parallelize this single-threaded implementation if desired
EIGEN_DEVICE_FUNC void accumulateTo(Scalar* data) {
- // We fix the index along the scan axis to 0 and perform an
+ // We fix the index along the scan axis to 0 and perform a
// scan per remaining entry. The iteration is split into two nested
// loops to avoid an integer division by keeping track of each idx1 and idx2.
for (Index idx1 = 0; idx1 < dimensions().TotalSize() / m_size; idx1 += m_stride) {
@@ -180,12 +189,17 @@ protected:
// Calculate the starting offset for the scan
Index offset = idx1 * m_size + idx2;
- // Compute the prefix sum along the axis, starting at the calculated offset
+ // Compute the scan along the axis, starting at the calculated offset
CoeffReturnType accum = m_accumulator.initialize();
for (Index idx3 = 0; idx3 < m_size; idx3++) {
Index curr = offset + idx3 * m_stride;
- m_accumulator.reduce(m_impl.coeff(curr), &accum);
- data[curr] = m_accumulator.finalize(accum);
+ if (m_exclusive) {
+ data[curr] = m_accumulator.finalize(accum);
+ m_accumulator.reduce(m_impl.coeff(curr), &accum);
+ } else {
+ m_accumulator.reduce(m_impl.coeff(curr), &accum);
+ data[curr] = m_accumulator.finalize(accum);
+ }
}
}
}
diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
index 8bc986c84..1369ca183 100644
--- a/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/ThreadPool/NonBlockingThreadPool.h
@@ -113,10 +113,10 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface {
typedef typename Environment::EnvThread Thread;
struct PerThread {
- bool inited;
- NonBlockingThreadPoolTempl* pool; // Parent pool, or null for normal threads.
- int thread_id; // Worker thread index in pool.
- unsigned rand; // Random generator state.
+ constexpr PerThread() : pool(NULL), index(-1), rand(0) { }
+ NonBlockingThreadPoolTempl* pool; // Parent pool, or null for normal threads.
+ int thread_id; // Worker thread index in pool.
+ uint64_t rand; // Random generator state.
};
Environment env_;
@@ -133,6 +133,7 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface {
void WorkerLoop(int thread_id) {
PerThread* pt = GetPerThread();
pt->pool = this;
+ pt->rand = std::hash<std::thread::id>()(std::this_thread::get_id());
pt->thread_id = thread_id;
Queue* q = queues_[thread_id];
EventCount::Waiter* waiter = &waiters_[thread_id];
@@ -249,17 +250,18 @@ class NonBlockingThreadPoolTempl : public Eigen::ThreadPoolInterface {
return -1;
}
- PerThread* GetPerThread() {
+ static EIGEN_STRONG_INLINE PerThread* GetPerThread() {
EIGEN_THREAD_LOCAL PerThread per_thread_;
PerThread* pt = &per_thread_;
- if (pt->inited) return pt;
- pt->inited = true;
- pt->rand = static_cast<unsigned>(std::hash<std::thread::id>()(std::this_thread::get_id()));
return pt;
}
- static unsigned Rand(unsigned* state) {
- return *state = *state * 1103515245 + 12345;
+ static EIGEN_STRONG_INLINE unsigned Rand(uint64_t* state) {
+ uint64_t current = *state;
+ // Update the internal state
+ *state = current * 6364136223846793005ULL + 0xda3e39cb94b95bdbULL;
+ // Generate the random output (using the PCG-XSH-RS scheme)
+ return static_cast<unsigned>((current ^ (current >> 22)) >> (22 + (current >> 61)));
}
};
diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
index 089042751..feaeeaf5a 100755
--- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
+++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h
@@ -30,6 +30,13 @@ template<typename _DerType, bool Enable> struct auto_diff_special_op;
} // end namespace internal
+template<typename _DerType> class AutoDiffScalar;
+
+template<typename NewDerType>
+inline AutoDiffScalar<NewDerType> MakeAutoDiffScalar(const typename NewDerType::Scalar& value, const NewDerType &der) {
+ return AutoDiffScalar<NewDerType>(value,der);
+}
+
/** \class AutoDiffScalar
* \brief A scalar type replacement with automatic differentation capability
*
@@ -257,20 +264,16 @@ class AutoDiffScalar
-m_derivatives);
}
- inline const AutoDiffScalar<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DerType> >
+ inline const AutoDiffScalar<EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product) >
operator*(const Scalar& other) const
{
- return AutoDiffScalar<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DerType> >(
- m_value * other,
- (m_derivatives * other));
+ return MakeAutoDiffScalar(m_value * other, m_derivatives * other);
}
- friend inline const AutoDiffScalar<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DerType> >
+ friend inline const AutoDiffScalar<EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product) >
operator*(const Scalar& other, const AutoDiffScalar& a)
{
- return AutoDiffScalar<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DerType> >(
- a.value() * other,
- a.derivatives() * other);
+ return MakeAutoDiffScalar(a.value() * other, a.derivatives() * other);
}
// inline const AutoDiffScalar<typename CwiseUnaryOp<internal::scalar_multiple_op<Real>, DerType>::Type >
@@ -289,20 +292,16 @@ class AutoDiffScalar
// a.derivatives() * other);
// }
- inline const AutoDiffScalar<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DerType> >
+ inline const AutoDiffScalar<EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product) >
operator/(const Scalar& other) const
{
- return AutoDiffScalar<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DerType> >(
- m_value / other,
- (m_derivatives * (Scalar(1)/other)));
+ return MakeAutoDiffScalar(m_value / other, (m_derivatives * (Scalar(1)/other)));
}
- friend inline const AutoDiffScalar<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DerType> >
+ friend inline const AutoDiffScalar<EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product) >
operator/(const Scalar& other, const AutoDiffScalar& a)
{
- return AutoDiffScalar<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DerType> >(
- other / a.value(),
- a.derivatives() * (Scalar(-other) / (a.value()*a.value())));
+ return MakeAutoDiffScalar(other / a.value(), a.derivatives() * (Scalar(-other) / (a.value()*a.value())));
}
// inline const AutoDiffScalar<typename CwiseUnaryOp<internal::scalar_multiple_op<Real>, DerType>::Type >
@@ -322,34 +321,29 @@ class AutoDiffScalar
// }
template<typename OtherDerType>
- inline const AutoDiffScalar<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,
- const CwiseBinaryOp<internal::scalar_difference_op<Scalar>,
- const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DerType>,
- const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const typename internal::remove_all<OtherDerType>::type > > > >
+ inline const AutoDiffScalar<EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(
+ CwiseBinaryOp<internal::scalar_difference_op<Scalar> EIGEN_COMMA
+ const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product) EIGEN_COMMA
+ const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename internal::remove_all<OtherDerType>::type,Scalar,product) >,Scalar,product) >
operator/(const AutoDiffScalar<OtherDerType>& other) const
{
internal::make_coherent(m_derivatives, other.derivatives());
- return AutoDiffScalar<CwiseUnaryOp<internal::scalar_multiple_op<Scalar>,
- const CwiseBinaryOp<internal::scalar_difference_op<Scalar>,
- const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DerType>,
- const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const typename internal::remove_all<OtherDerType>::type > > > >(
+ return MakeAutoDiffScalar(
m_value / other.value(),
- ((m_derivatives * other.value()) - (m_value * other.derivatives()))
+ ((m_derivatives * other.value()) - (other.derivatives() * m_value))
* (Scalar(1)/(other.value()*other.value())));
}
template<typename OtherDerType>
inline const AutoDiffScalar<CwiseBinaryOp<internal::scalar_sum_op<Scalar>,
- const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DerType>,
- const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const typename internal::remove_all<OtherDerType>::type> > >
+ const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(DerType,Scalar,product),
+ const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename internal::remove_all<OtherDerType>::type,Scalar,product) > >
operator*(const AutoDiffScalar<OtherDerType>& other) const
{
internal::make_coherent(m_derivatives, other.derivatives());
- return AutoDiffScalar<const CwiseBinaryOp<internal::scalar_sum_op<Scalar>,
- const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const DerType>,
- const CwiseUnaryOp<internal::scalar_multiple_op<Scalar>, const typename internal::remove_all<OtherDerType>::type > > >(
+ return MakeAutoDiffScalar(
m_value * other.value(),
- (m_derivatives * other.value()) + (m_value * other.derivatives()));
+ (m_derivatives * other.value()) + (other.derivatives() * m_value));
}
inline AutoDiffScalar& operator*=(const Scalar& other)
@@ -426,18 +420,18 @@ struct auto_diff_special_op<_DerType, true>
}
- inline const AutoDiffScalar<typename CwiseUnaryOp<scalar_multiple2_op<Scalar,Real>, DerType>::Type >
+ inline const AutoDiffScalar<typename CwiseUnaryOp<bind2nd_op<scalar_product_op<Scalar,Real> >, DerType>::Type >
operator*(const Real& other) const
{
- return AutoDiffScalar<typename CwiseUnaryOp<scalar_multiple2_op<Scalar,Real>, DerType>::Type >(
+ return AutoDiffScalar<typename CwiseUnaryOp<bind2nd_op<scalar_product_op<Scalar,Real> >, DerType>::Type >(
derived().value() * other,
derived().derivatives() * other);
}
- friend inline const AutoDiffScalar<typename CwiseUnaryOp<scalar_multiple2_op<Scalar,Real>, DerType>::Type >
+ friend inline const AutoDiffScalar<typename CwiseUnaryOp<bind1st_op<scalar_product_op<Real,Scalar> >, DerType>::Type >
operator*(const Real& other, const AutoDiffScalar<_DerType>& a)
{
- return AutoDiffScalar<typename CwiseUnaryOp<scalar_multiple2_op<Scalar,Real>, DerType>::Type >(
+ return AutoDiffScalar<typename CwiseUnaryOp<bind1st_op<scalar_product_op<Real,Scalar> >, DerType>::Type >(
a.value() * other,
a.derivatives() * other);
}
@@ -501,43 +495,43 @@ struct make_coherent_impl<Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows,
}
};
+} // end namespace internal
+
template<typename A_Scalar, int A_Rows, int A_Cols, int A_Options, int A_MaxRows, int A_MaxCols>
-struct scalar_product_traits<Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols>,A_Scalar>
+struct ScalarBinaryOpTraits<Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols>,A_Scalar>
{
enum { Defined = 1 };
typedef Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols> ReturnType;
};
template<typename A_Scalar, int A_Rows, int A_Cols, int A_Options, int A_MaxRows, int A_MaxCols>
-struct scalar_product_traits<A_Scalar, Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols> >
+struct ScalarBinaryOpTraits<A_Scalar, Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols> >
{
enum { Defined = 1 };
typedef Matrix<A_Scalar, A_Rows, A_Cols, A_Options, A_MaxRows, A_MaxCols> ReturnType;
};
template<typename DerType>
-struct scalar_product_traits<AutoDiffScalar<DerType>,typename DerType::Scalar>
+struct ScalarBinaryOpTraits<AutoDiffScalar<DerType>,typename DerType::Scalar>
{
enum { Defined = 1 };
typedef AutoDiffScalar<DerType> ReturnType;
};
template<typename DerType>
-struct scalar_product_traits<typename DerType::Scalar,AutoDiffScalar<DerType> >
+struct ScalarBinaryOpTraits<typename DerType::Scalar,AutoDiffScalar<DerType> >
{
enum { Defined = 1 };
typedef AutoDiffScalar<DerType> ReturnType;
};
-} // end namespace internal
-
#define EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(FUNC,CODE) \
template<typename DerType> \
- inline const Eigen::AutoDiffScalar<Eigen::CwiseUnaryOp<Eigen::internal::scalar_multiple_op<typename Eigen::internal::traits<typename Eigen::internal::remove_all<DerType>::type>::Scalar>, const typename Eigen::internal::remove_all<DerType>::type> > \
+ inline const Eigen::AutoDiffScalar< \
+ EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename Eigen::internal::remove_all<DerType>::type, typename Eigen::internal::traits<typename Eigen::internal::remove_all<DerType>::type>::Scalar, product) > \
FUNC(const Eigen::AutoDiffScalar<DerType>& x) { \
using namespace Eigen; \
typedef typename Eigen::internal::traits<typename Eigen::internal::remove_all<DerType>::type>::Scalar Scalar; \
- typedef AutoDiffScalar<CwiseUnaryOp<Eigen::internal::scalar_multiple_op<Scalar>, const typename Eigen::internal::remove_all<DerType>::type> > ReturnType; \
CODE; \
}
@@ -570,46 +564,45 @@ inline AutoDiffScalar<typename Eigen::internal::remove_all<DerType>::type::Plain
EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(abs,
using std::abs;
- return ReturnType(abs(x.value()), x.derivatives() * (x.value()<0 ? -1 : 1) );)
+ return Eigen::MakeAutoDiffScalar(abs(x.value()), x.derivatives() * (x.value()<0 ? -1 : 1) );)
EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(abs2,
using numext::abs2;
- return ReturnType(abs2(x.value()), x.derivatives() * (Scalar(2)*x.value()));)
+ return Eigen::MakeAutoDiffScalar(abs2(x.value()), x.derivatives() * (Scalar(2)*x.value()));)
EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(sqrt,
using std::sqrt;
Scalar sqrtx = sqrt(x.value());
- return ReturnType(sqrtx,x.derivatives() * (Scalar(0.5) / sqrtx));)
+ return Eigen::MakeAutoDiffScalar(sqrtx,x.derivatives() * (Scalar(0.5) / sqrtx));)
EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(cos,
using std::cos;
using std::sin;
- return ReturnType(cos(x.value()), x.derivatives() * (-sin(x.value())));)
+ return Eigen::MakeAutoDiffScalar(cos(x.value()), x.derivatives() * (-sin(x.value())));)
EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(sin,
using std::sin;
using std::cos;
- return ReturnType(sin(x.value()),x.derivatives() * cos(x.value()));)
+ return Eigen::MakeAutoDiffScalar(sin(x.value()),x.derivatives() * cos(x.value()));)
EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(exp,
using std::exp;
Scalar expx = exp(x.value());
- return ReturnType(expx,x.derivatives() * expx);)
+ return Eigen::MakeAutoDiffScalar(expx,x.derivatives() * expx);)
EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(log,
using std::log;
- return ReturnType(log(x.value()),x.derivatives() * (Scalar(1)/x.value()));)
+ return Eigen::MakeAutoDiffScalar(log(x.value()),x.derivatives() * (Scalar(1)/x.value()));)
template<typename DerType>
-inline const Eigen::AutoDiffScalar<Eigen::CwiseUnaryOp<Eigen::internal::scalar_multiple_op<typename internal::traits<typename internal::remove_all<DerType>::type>::Scalar>, const typename internal::remove_all<DerType>::type> >
-pow(const Eigen::AutoDiffScalar<DerType>& x, const typename internal::traits<typename internal::remove_all<DerType>::type>::Scalar &y)
+inline const Eigen::AutoDiffScalar<
+EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(typename internal::remove_all<DerType>::type,typename internal::traits<typename internal::remove_all<DerType>::type>::Scalar,product) >
+pow(const Eigen::AutoDiffScalar<DerType> &x, const typename internal::traits<typename internal::remove_all<DerType>::type>::Scalar &y)
{
using namespace Eigen;
typedef typename internal::remove_all<DerType>::type DerTypeCleaned;
typedef typename Eigen::internal::traits<DerTypeCleaned>::Scalar Scalar;
- return AutoDiffScalar<CwiseUnaryOp<Eigen::internal::scalar_multiple_op<Scalar>, const DerTypeCleaned> >(
- std::pow(x.value(),y),
- x.derivatives() * (y * std::pow(x.value(),y-1)));
+ return Eigen::MakeAutoDiffScalar(std::pow(x.value(),y), x.derivatives() * (y * std::pow(x.value(),y-1)));
}
@@ -634,17 +627,17 @@ atan2(const AutoDiffScalar<DerTypeA>& a, const AutoDiffScalar<DerTypeB>& b)
EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(tan,
using std::tan;
using std::cos;
- return ReturnType(tan(x.value()),x.derivatives() * (Scalar(1)/numext::abs2(cos(x.value()))));)
+ return Eigen::MakeAutoDiffScalar(tan(x.value()),x.derivatives() * (Scalar(1)/numext::abs2(cos(x.value()))));)
EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(asin,
using std::sqrt;
using std::asin;
- return ReturnType(asin(x.value()),x.derivatives() * (Scalar(1)/sqrt(1-numext::abs2(x.value()))));)
+ return Eigen::MakeAutoDiffScalar(asin(x.value()),x.derivatives() * (Scalar(1)/sqrt(1-numext::abs2(x.value()))));)
EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY(acos,
using std::sqrt;
using std::acos;
- return ReturnType(acos(x.value()),x.derivatives() * (Scalar(-1)/sqrt(1-numext::abs2(x.value()))));)
+ return Eigen::MakeAutoDiffScalar(acos(x.value()),x.derivatives() * (Scalar(-1)/sqrt(1-numext::abs2(x.value()))));)
#undef EIGEN_AUTODIFF_DECLARE_GLOBAL_UNARY
diff --git a/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h b/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h
index bf9727c21..582fa8512 100644
--- a/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h
+++ b/unsupported/Eigen/src/KroneckerProduct/KroneckerTensorProduct.h
@@ -203,7 +203,7 @@ struct traits<KroneckerProduct<_Lhs,_Rhs> >
{
typedef typename remove_all<_Lhs>::type Lhs;
typedef typename remove_all<_Rhs>::type Rhs;
- typedef typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType Scalar;
+ typedef typename ScalarBinaryOpTraits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType Scalar;
typedef typename promote_index_type<typename Lhs::StorageIndex, typename Rhs::StorageIndex>::type StorageIndex;
enum {
@@ -222,7 +222,7 @@ struct traits<KroneckerProductSparse<_Lhs,_Rhs> >
typedef MatrixXpr XprKind;
typedef typename remove_all<_Lhs>::type Lhs;
typedef typename remove_all<_Rhs>::type Rhs;
- typedef typename scalar_product_traits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType Scalar;
+ typedef typename ScalarBinaryOpTraits<typename Lhs::Scalar, typename Rhs::Scalar>::ReturnType Scalar;
typedef typename cwise_promote_storage_type<typename traits<Lhs>::StorageKind, typename traits<Rhs>::StorageKind, scalar_product_op<typename Lhs::Scalar, typename Rhs::Scalar> >::ret StorageKind;
typedef typename promote_index_type<typename Lhs::StorageIndex, typename Rhs::StorageIndex>::type StorageIndex;