aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-08-26 09:47:18 -0700
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2014-08-26 09:47:18 -0700
commit2959045f2fe111f93b23517fd6f7afe49720a290 (patch)
treefa1c8c1c77902b31cb0d0735c2d5098a0381fa71 /unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
parent36fffe48f7231e07915ec231d33cf46faa0fa918 (diff)
Optimized the tensor padding code.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h95
1 files changed, 81 insertions, 14 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
index 45558d7dd..4482c0992 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPadding.h
@@ -87,7 +87,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
enum {
IsAligned = false,
- PacketAccess = /*TensorEvaluator<ArgType, Device>::PacketAccess*/false,
+ PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
};
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
@@ -100,15 +100,13 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
}
const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
- for (int i = 0; i < NumDims; ++i) {
- if (i > 0) {
- m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
- m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
- } else {
- m_inputStrides[0] = 1;
- m_outputStrides[0] = 1;
- }
+ m_inputStrides[0] = 1;
+ m_outputStrides[0] = 1;
+ for (int i = 1; i < NumDims; ++i) {
+ m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
+ m_outputStrides[i] = m_outputStrides[i-1] * m_dimensions[i-1];
}
+ m_outputStrides[NumDims] = m_outputStrides[NumDims-1] * m_dimensions[NumDims-1];
}
typedef typename XprType::Scalar Scalar;
@@ -128,7 +126,7 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
{
Index inputIndex = 0;
- for (int i = NumDims - 1; i >= 0; --i) {
+ for (int i = NumDims - 1; i > 0; --i) {
const Index idx = index / m_outputStrides[i];
if (idx < m_padding[i].first || idx >= m_dimensions[i] - m_padding[i].second) {
return Scalar(0);
@@ -136,21 +134,90 @@ struct TensorEvaluator<const TensorPaddingOp<PaddingDimensions, ArgType>, Device
inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
index -= idx * m_outputStrides[i];
}
+ if (index < m_padding[0].first || index >= m_dimensions[0] - m_padding[0].second) {
+ return Scalar(0);
+ }
+ inputIndex += (index - m_padding[0].first);
return m_impl.coeff(inputIndex);
}
- /* template<int LoadMode>
+ template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
{
- return m_impl.template packet<LoadMode>(index);
- }*/
+ static const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
+ EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
+ eigen_assert(index+packetSize-1 < dimensions().TotalSize());
+
+ const Index initialIndex = index;
+ Index inputIndex = 0;
+ for (int i = NumDims - 1; i > 0; --i) {
+ const int first = index;
+ const int last = index + packetSize - 1;
+ const int lastPaddedLeft = m_padding[i].first * m_outputStrides[i];
+ const int firstPaddedRight = (m_dimensions[i] - m_padding[i].second) * m_outputStrides[i];
+ const int lastPaddedRight = m_outputStrides[i+1];
+
+ if (last < lastPaddedLeft) {
+ // all the coefficient are in the padding zone.
+ return internal::pset1<PacketReturnType>(Scalar(0));
+ }
+ else if (first >= firstPaddedRight && last < lastPaddedRight) {
+ // all the coefficient are in the padding zone.
+ return internal::pset1<PacketReturnType>(Scalar(0));
+ }
+ else if (first >= lastPaddedLeft && last < firstPaddedRight) {
+ // all the coefficient are between the 2 padding zones.
+ const Index idx = index / m_outputStrides[i];
+ inputIndex += (idx - m_padding[i].first) * m_inputStrides[i];
+ index -= idx * m_outputStrides[i];
+ }
+ else {
+ // Every other case
+ return packetWithPossibleZero(initialIndex);
+ }
+ }
+
+ const Index last = index + packetSize - 1;
+ const Index first = index;
+ const int lastPaddedLeft = m_padding[0].first;
+ const int firstPaddedRight = (m_dimensions[0] - m_padding[0].second);
+ const int lastPaddedRight = m_outputStrides[1];
+
+ if (last < lastPaddedLeft) {
+ // all the coefficient are in the padding zone.
+ return internal::pset1<PacketReturnType>(Scalar(0));
+ }
+ else if (first >= firstPaddedRight && last < lastPaddedRight) {
+ // all the coefficient are in the padding zone.
+ return internal::pset1<PacketReturnType>(Scalar(0));
+ }
+ else if (first >= lastPaddedLeft && last < firstPaddedRight) {
+ // all the coefficient are between the 2 padding zones.
+ inputIndex += (index - m_padding[0].first);
+ return m_impl.template packet<Unaligned>(inputIndex);
+ }
+ // Every other case
+ return packetWithPossibleZero(initialIndex);
+ }
Scalar* data() const { return NULL; }
protected:
+
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetWithPossibleZero(Index index) const
+ {
+ static const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
+ EIGEN_ALIGN_DEFAULT typename internal::remove_const<CoeffReturnType>::type values[packetSize];
+ for (int i = 0; i < packetSize; ++i) {
+ values[i] = coeff(index+i);
+ }
+ PacketReturnType rslt = internal::pload<PacketReturnType>(values);
+ return rslt;
+ }
+
PaddingDimensions m_padding;
Dimensions m_dimensions;
- array<Index, NumDims> m_outputStrides;
+ array<Index, NumDims+1> m_outputStrides;
array<Index, NumDims> m_inputStrides;
TensorEvaluator<ArgType, Device> m_impl;
};