aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor
diff options
context:
space:
mode:
authorGravatar Ville Kallioniemi <ville.kallioniemi@gmail.com>2016-02-01 19:32:31 -0700
committerGravatar Ville Kallioniemi <ville.kallioniemi@gmail.com>2016-02-01 19:32:31 -0700
commitf0fdefa96fdbdafe0daaa47a2dd54b9e77cf9716 (patch)
treee175666422c11be478eaaf68c934e1fc913fec6f /unsupported/Eigen/CXX11/src/Tensor
parent02db1228ed9ca3728ae0685a5e1602fe7299ae50 (diff)
parent64ce78c2ec52aa2fd2e408c7c4160b06e8fc1a03 (diff)
Rebase to latest.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBase.h6
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h8
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h8
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h5
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h8
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h8
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h12
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h2
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h12
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h6
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h4
13 files changed, 48 insertions, 37 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
index 392acf302..cca716d6f 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h
@@ -129,6 +129,12 @@ class TensorBase<Derived, ReadOnlyAccessors>
}
EIGEN_DEVICE_FUNC
+ EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_digamma_op<Scalar>, const Derived>
+ digamma() const {
+ return unaryExpr(internal::scalar_digamma_op<Scalar>());
+ }
+
+ EIGEN_DEVICE_FUNC
EIGEN_STRONG_INLINE const TensorCwiseUnaryOp<internal::scalar_erf_op<Scalar>, const Derived>
erf() const {
return unaryExpr(internal::scalar_erf_op<Scalar>());
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
index 624e814e2..1adb68894 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h
@@ -378,7 +378,7 @@ struct TensorContractionEvaluatorBase
}
template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment>
- void evalGemv(Scalar* buffer) const {
+ EIGEN_DEVICE_FUNC void evalGemv(Scalar* buffer) const {
const Index rows = m_i_size;
const Index cols = m_k_size;
@@ -516,7 +516,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
Base(op, device) { }
template <bool lhs_inner_dim_contiguous, bool rhs_inner_dim_contiguous, bool rhs_inner_dim_reordered, int Alignment>
- void evalProduct(Scalar* buffer) const {
+ EIGEN_DEVICE_FUNC void evalProduct(Scalar* buffer) const {
if (this->m_j_size == 1) {
this->template evalGemv<lhs_inner_dim_contiguous, rhs_inner_dim_contiguous, rhs_inner_dim_reordered, Alignment>(buffer);
return;
@@ -582,10 +582,8 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
OutputMapper output(buffer, m);
- typedef typename internal::gemm_blocking_space<ColMajor, LhsScalar, RhsScalar, Dynamic, Dynamic, Dynamic> BlockingType;
-
// Sizes of the blocks to load in cache. See the Goto paper for details.
- BlockingType blocking(m, n, k, 1, true);
+ internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index, internal::ShardByCol> blocking(k, m, n, 1);
const Index kc = blocking.kc();
const Index mc = numext::mini(m, blocking.mc());
const Index nc = numext::mini(n, blocking.nc());
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h
index 78ed5038f..3d3f6904f 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h
@@ -28,7 +28,7 @@ class TensorContractionBlocking {
typedef typename LhsMapper::Scalar LhsScalar;
typedef typename RhsMapper::Scalar RhsScalar;
- TensorContractionBlocking(Index k, Index m, Index n, Index num_threads = 1) :
+ EIGEN_DEVICE_FUNC TensorContractionBlocking(Index k, Index m, Index n, Index num_threads = 1) :
kc_(k), mc_(m), nc_(n)
{
if (ShardingType == ShardByCol) {
@@ -41,9 +41,9 @@ class TensorContractionBlocking {
}
}
- EIGEN_ALWAYS_INLINE Index kc() const { return kc_; }
- EIGEN_ALWAYS_INLINE Index mc() const { return mc_; }
- EIGEN_ALWAYS_INLINE Index nc() const { return nc_; }
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index kc() const { return kc_; }
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index mc() const { return mc_; }
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Index nc() const { return nc_; }
private:
Index kc_;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h
index 9b6d18090..63c8ae126 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionMapper.h
@@ -426,15 +426,16 @@ class TensorContractionSubMapper {
};
-template<typename Scalar, typename Index, int side,
+template<typename Scalar_, typename Index, int side,
typename Tensor,
typename nocontract_t, typename contract_t,
int packet_size,
bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
class TensorContractionInputMapper
- : public BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> {
+ : public BaseTensorContractionMapper<Scalar_, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> {
public:
+ typedef Scalar_ Scalar;
typedef BaseTensorContractionMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Base;
typedef TensorContractionSubMapper<Scalar, Index, side, Tensor, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> SubMapper;
typedef SubMapper VectorMapper;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
index 576bea295..51a3b9490 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionThreadPool.h
@@ -176,10 +176,10 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
// compute block sizes (which depend on number of threads)
const Index num_threads = this->m_device.numThreads();
- Index mc = m;
- Index nc = n;
- Index kc = k;
- internal::computeProductBlockingSizes<LhsScalar,RhsScalar,1>(kc, mc, nc, num_threads);
+ internal::TensorContractionBlocking<LhsMapper, RhsMapper, Index, internal::ShardByCol> blocking(k, m, n, num_threads);
+ Index mc = blocking.mc();
+ Index nc = blocking.nc();
+ Index kc = blocking.kc();
eigen_assert(mc <= m);
eigen_assert(nc <= n);
eigen_assert(kc <= k);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
index 367a152a0..67c797802 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h
@@ -21,7 +21,7 @@ namespace Eigen {
*/
namespace internal {
-template <typename Index, typename InputDims, size_t NumKernelDims, int Layout>
+template <typename Index, typename InputDims, int NumKernelDims, int Layout>
class IndexMapper {
public:
IndexMapper(const InputDims& input_dims, const array<Index, NumKernelDims>& kernel_dims,
@@ -123,7 +123,7 @@ class IndexMapper {
}
inputIndex += p * m_inputStrides[NumKernelDims];
} else {
- int limit = 0;
+ std::ptrdiff_t limit = 0;
if (NumKernelDims < NumDims) {
limit = NumDims - NumKernelDims - 1;
}
@@ -147,7 +147,7 @@ class IndexMapper {
}
outputIndex += p * m_outputStrides[NumKernelDims];
} else {
- int limit = 0;
+ std::ptrdiff_t limit = 0;
if (NumKernelDims < NumDims) {
limit = NumDims - NumKernelDims - 1;
}
@@ -206,7 +206,7 @@ class IndexMapper {
}
private:
- static const size_t NumDims = internal::array_size<InputDims>::value;
+ static const int NumDims = internal::array_size<InputDims>::value;
array<Index, NumDims> m_inputStrides;
array<Index, NumDims> m_outputStrides;
array<Index, NumDims> m_cudaInputStrides;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
index 5abdc489b..e684ab8f7 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h
@@ -109,10 +109,12 @@ class CudaStreamDevice : public StreamInterface {
struct GpuDevice {
// The StreamInterface is not owned: the caller is
// responsible for its initialization and eventual destruction.
- explicit GpuDevice(const StreamInterface* stream) : stream_(stream) {
+ explicit GpuDevice(const StreamInterface* stream) : stream_(stream), max_blocks_(INT_MAX) {
+ eigen_assert(stream);
+ }
+ explicit GpuDevice(const StreamInterface* stream, int num_blocks) : stream_(stream), max_blocks_(num_blocks) {
eigen_assert(stream);
}
-
// TODO(bsteiner): This is an internal API, we should not expose it.
EIGEN_STRONG_INLINE const cudaStream_t& stream() const {
return stream_->stream();
@@ -246,6 +248,10 @@ struct GpuDevice {
#endif
}
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE int maxBlocks() const {
+ return max_blocks_;
+ }
+
// This function checks if the CUDA runtime recorded an error for the
// underlying stream device.
inline bool ok() const {
@@ -259,7 +265,7 @@ struct GpuDevice {
private:
const StreamInterface* stream_;
-
+ int max_blocks_;
};
#ifndef __CUDA_ARCH__
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
index e7daf7304..bd83d5de8 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h
@@ -136,7 +136,7 @@ struct TensorEvaluator<const TensorEvalToOp<ArgType>, Device>
}
template<int LoadMode>
- EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
return internal::ploadt<Packet, LoadMode>(m_buffer + index);
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
index d2ab70f2b..df15c6204 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h
@@ -220,7 +220,7 @@ EIGEN_DEVICE_FUNC inline void TensorExecutor<Expression, GpuDevice, false>::run(
if (needs_assign)
{
const int block_size = device.maxCudaThreadsPerBlock();
- const int max_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size;
+ const int max_blocks = numext::maxi<int>(device.maxBlocks(), device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size);
const Index size = array_prod(evaluator.dimensions());
// Create a least one block to ensure we won't crash if we're called with tensors of size 0.
const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, (size + block_size - 1) / block_size), 1);
@@ -239,7 +239,7 @@ EIGEN_DEVICE_FUNC inline void TensorExecutor<Expression, GpuDevice, true>::run(c
if (needs_assign)
{
const int block_size = device.maxCudaThreadsPerBlock();
- const int max_blocks = device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size;
+ const int max_blocks = numext::maxi<int>(device.maxBlocks(), device.getNumCudaMultiProcessors() * device.maxCudaThreadsPerMultiProcessor() / block_size);
const Index size = array_prod(evaluator.dimensions());
// Create a least one block to ensure we won't crash if we're called with tensors of size 0.
const int num_blocks = numext::maxi<int>(numext::mini<int>(max_blocks, (size + block_size - 1) / block_size), 1);
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
index c9b0b2f28..58b864787 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h
@@ -106,7 +106,6 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); }
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
- m_impl.evalSubExprsIfNeeded(NULL);
const Index numValues = m_impl.dimensions().TotalSize();
m_buffer = (CoeffReturnType*)m_device.allocate(numValues * sizeof(CoeffReturnType));
// Should initialize the memory in case we're dealing with non POD types.
@@ -119,7 +118,6 @@ struct TensorEvaluator<const TensorForcedEvalOp<ArgType>, Device>
EvalTo evalToTmp(m_buffer, m_op);
const bool PacketAccess = internal::IsVectorizable<Device, const ArgType>::value;
internal::TensorExecutor<const EvalTo, Device, PacketAccess>::run(evalToTmp, m_device);
- m_impl.cleanup();
return true;
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
index 09ee0c2c6..22aea5ea4 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h
@@ -345,8 +345,8 @@ template <typename Self, typename Op, typename Device>
struct InnerReducer {
static const bool HasOptimizedImplementation = false;
- static void run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) {
- assert(false && "Not implemented");
+ EIGEN_DEVICE_FUNC static void run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) {
+ eigen_assert(false && "Not implemented");
}
};
@@ -355,8 +355,8 @@ template <typename Self, typename Op, typename Device>
struct OuterReducer {
static const bool HasOptimizedImplementation = false;
- static void run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) {
- assert(false && "Not implemented");
+ EIGEN_DEVICE_FUNC static void run(const Self&, Op&, const Device&, typename Self::CoeffReturnType*, typename Self::Index, typename Self::Index) {
+ eigen_assert(false && "Not implemented");
}
};
@@ -463,7 +463,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1];
}
} else {
- m_outputStrides[NumOutputDims - 1] = 1;
+ m_outputStrides.back() = 1;
for (int i = NumOutputDims - 2; i >= 0; --i) {
m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1];
}
@@ -479,7 +479,7 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType>, Device>
input_strides[i] = input_strides[i-1] * input_dims[i-1];
}
} else {
- input_strides[NumInputDims - 1] = 1;
+ input_strides.back() = 1;
for (int i = NumInputDims - 2; i >= 0; --i) {
input_strides[i] = input_strides[i + 1] * input_dims[i + 1];
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h b/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h
index 98631fc7f..ed933b6ac 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorStorage.h
@@ -41,7 +41,10 @@ class TensorStorage<T, FixedDimensions, Options_>
private:
static const std::size_t Size = FixedDimensions::total_size;
- EIGEN_ALIGN_MAX T m_data[Size];
+ // Allocate an array of size at least one to prevent compiler warnings.
+ static const std::size_t MinSize = max_n_1<Size>::size;
+ EIGEN_ALIGN_MAX T m_data[MinSize];
+
FixedDimensions m_dimensions;
public:
@@ -105,7 +108,6 @@ class TensorStorage<T, DSizes<IndexType, NumIndices_>, Options_>
EIGEN_DEVICE_FUNC void resize(Index size, const array<Index, NumIndices_>& nbDimensions)
{
- eigen_assert(size >= 1);
const Index currentSz = internal::array_prod(m_dimensions);
if(size != currentSz)
{
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h b/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h
index 19352eb5e..0d34f7ee6 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorUInt128.h
@@ -34,11 +34,11 @@ struct TensorUInt128
LOW low;
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
- TensorUInt128(int x) : high(0), low(x) {
+ TensorUInt128(int32_t x) : high(0), low(x) {
eigen_assert(x >= 0);
}
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
- TensorUInt128(unsigned int x) : high(0), low(x) { }
+ TensorUInt128(uint32_t x) : high(0), low(x) { }
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE
TensorUInt128(long x) : high(0), low(x) {
eigen_assert(x >= 0);