aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
diff options
context:
space:
mode:
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h110
1 files changed, 106 insertions, 4 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
index b6c93aff9..9ab6b3565 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
@@ -105,6 +105,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
+ bool nByOne = false, oneByN = false;
enum {
IsAligned = true,
@@ -142,6 +143,24 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
m_outputStrides[i] = m_outputStrides[i+1] * m_dimensions[i+1];
}
}
+
+ if (input_dims[0] == 1) {
+ oneByN = true;
+ for (int i = 1; i < NumDims; ++i) {
+ if (broadcast[i] != 1) {
+ oneByN = false;
+ break;
+ }
+ }
+ } else if (input_dims[NumDims-1] == 1) {
+ nByOne = true;
+ for (int i = 0; i < NumDims-1; ++i) {
+ if (broadcast[i] != 1) {
+ nByOne = false;
+ break;
+ }
+ }
+ }
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
@@ -237,9 +256,84 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
}
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- return packetColMajor<LoadMode>(index);
+ if (oneByN) {
+ return packetNByOne<LoadMode>(index);
+ } else if (nByOne) {
+ return packetOneByN<LoadMode>(index);
+ } else {
+ return packetColMajor<LoadMode>(index);
+ }
} else {
- return packetRowMajor<LoadMode>(index);
+ if (oneByN) {
+ return packetOneByN<LoadMode>(index);
+ } else if (nByOne) {
+ return packetNByOne<LoadMode>(index);
+ } else {
+ return packetRowMajor<LoadMode>(index);
+ }
+ }
+ }
+
+ template<int LoadMode>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetOneByN(Index index) const
+ {
+ EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
+ eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
+
+ Index dim, inputIndex;
+
+ if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
+ dim = NumDims - 1;
+ } else {
+ dim = 0;
+ }
+
+ inputIndex = index % m_inputStrides[dim];
+ if (inputIndex + PacketSize <= m_inputStrides[dim]) {
+ return m_impl.template packet<Unaligned>(inputIndex);
+ } else {
+ EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
+ for (int i = 0; i < PacketSize; ++i) {
+ if (inputIndex > m_inputStrides[dim]-1) {
+ inputIndex = 0;
+ }
+ values[i] = m_impl.coeff(inputIndex++);
+ }
+ return internal::pload<PacketReturnType>(values);
+ }
+ }
+
+ template<int LoadMode>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetNByOne(Index index) const
+ {
+ EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
+ eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
+
+ EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
+ Index dim, inputIndex, outputOffset;
+
+ if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
+ dim = 1;
+ } else {
+ dim = NumDims - 2;
+ }
+
+ inputIndex = index / m_outputStrides[dim];
+ outputOffset = index % m_outputStrides[dim];
+ if (outputOffset + PacketSize <= m_outputStrides[dim]) {
+ values[0] = m_impl.coeff(inputIndex);
+ return internal::pload1<PacketReturnType>(values);
+ } else {
+ for (int i = 0, cur = 0; i < PacketSize; ++i, ++cur) {
+ if (outputOffset + cur < m_outputStrides[dim]) {
+ values[i] = m_impl.coeff(inputIndex);
+ } else {
+ values[i] = m_impl.coeff(++inputIndex);
+ outputOffset = 0;
+ cur = 0;
+ }
+ }
+ return internal::pload<PacketReturnType>(values);
}
}
@@ -290,7 +384,11 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
values[0] = m_impl.coeff(inputIndex);
for (int i = 1; i < PacketSize; ++i) {
- values[i] = coeffColMajor(originalIndex+i);
+ if (innermostLoc + i < m_impl.dimensions()[0]) {
+ values[i] = m_impl.coeff(inputIndex+i);
+ } else {
+ values[i] = coeffColMajor(originalIndex+i);
+ }
}
PacketReturnType rslt = internal::pload<PacketReturnType>(values);
return rslt;
@@ -342,7 +440,11 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
values[0] = m_impl.coeff(inputIndex);
for (int i = 1; i < PacketSize; ++i) {
- values[i] = coeffRowMajor(originalIndex+i);
+ if (innermostLoc + i < m_impl.dimensions()[NumDims-1]) {
+ values[i] = m_impl.coeff(inputIndex+i);
+ } else {
+ values[i] = coeffRowMajor(originalIndex+i);
+ }
}
PacketReturnType rslt = internal::pload<PacketReturnType>(values);
return rslt;