aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h70
-rw-r--r--unsupported/test/cxx11_tensor_broadcasting.cpp57
2 files changed, 123 insertions, 4 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
index 9ab6b3565..b35b36475 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
@@ -161,6 +161,22 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
}
}
}
+
+ // Handle special format like NCHW, its input shape is '[1, N..., 1]' and
+ // broadcast shape is '[N, 1..., N]'
+ if (!oneByN && !nByOne) {
+ if (input_dims[0] == 1 && input_dims[NumDims-1] == 1 && NumDims > 2) {
+ nByOne = true;
+ oneByN = true;
+ for (int i = 1; i < NumDims-1; ++i) {
+ if (broadcast[i] != 1) {
+ nByOne = false;
+ oneByN = false;
+ break;
+ }
+ }
+ }
+ }
}
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
@@ -256,18 +272,22 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
}
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- if (oneByN) {
+ if (oneByN && !nByOne) {
return packetNByOne<LoadMode>(index);
- } else if (nByOne) {
+ } else if (!oneByN && nByOne) {
return packetOneByN<LoadMode>(index);
+ } else if (oneByN && nByOne) {
+ return packetOneByNByOne<LoadMode>(index);
} else {
return packetColMajor<LoadMode>(index);
}
} else {
- if (oneByN) {
+ if (oneByN && !nByOne) {
return packetOneByN<LoadMode>(index);
- } else if (nByOne) {
+ } else if (!oneByN && nByOne) {
return packetNByOne<LoadMode>(index);
+ } else if (oneByN && nByOne) {
+ return packetOneByNByOne<LoadMode>(index);
} else {
return packetRowMajor<LoadMode>(index);
}
@@ -275,6 +295,48 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
}
template<int LoadMode>
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetOneByNByOne
+ (Index index) const
+ {
+ EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
+ eigen_assert(index+PacketSize-1 < dimensions().TotalSize());
+
+ EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[PacketSize];
+ Index startDim, endDim;
+ Index inputIndex, outputOffset, batchedIndex;
+
+ if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
+ startDim = NumDims - 1;
+ endDim = 1;
+ } else {
+ startDim = 0;
+ endDim = NumDims - 2;
+ }
+
+ batchedIndex = index % m_outputStrides[startDim];
+ inputIndex = batchedIndex / m_outputStrides[endDim];
+ outputOffset = batchedIndex % m_outputStrides[endDim];
+
+ if (outputOffset + PacketSize <= m_outputStrides[endDim]) {
+ values[0] = m_impl.coeff(inputIndex);
+ return internal::pload1<PacketReturnType>(values);
+ } else {
+ for (int i = 0, cur = 0; i < PacketSize; ++i, ++cur) {
+ if (outputOffset + cur < m_outputStrides[endDim]) {
+ values[i] = m_impl.coeff(inputIndex);
+ } else {
+ ++inputIndex;
+ inputIndex = (inputIndex == m_inputStrides[startDim] ? 0 : inputIndex);
+ values[i] = m_impl.coeff(inputIndex);
+ outputOffset = 0;
+ cur = 0;
+ }
+ }
+ return internal::pload<PacketReturnType>(values);
+ }
+ }
+
+ template<int LoadMode>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packetOneByN(Index index) const
{
EIGEN_STATIC_ASSERT((PacketSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
diff --git a/unsupported/test/cxx11_tensor_broadcasting.cpp b/unsupported/test/cxx11_tensor_broadcasting.cpp
index a9d268ea6..f0ff03184 100644
--- a/unsupported/test/cxx11_tensor_broadcasting.cpp
+++ b/unsupported/test/cxx11_tensor_broadcasting.cpp
@@ -238,6 +238,59 @@ static void test_simple_broadcasting_n_by_one()
}
}
+template <int DataLayout>
+static void test_simple_broadcasting_one_by_n_by_one_1d()
+{
+ Tensor<float, 3, DataLayout> tensor(1,7,1);
+ tensor.setRandom();
+ array<ptrdiff_t, 3> broadcasts;
+ broadcasts[0] = 5;
+ broadcasts[1] = 1;
+ broadcasts[2] = 13;
+ Tensor<float, 3, DataLayout> broadcasted;
+ broadcasted = tensor.broadcast(broadcasts);
+
+ VERIFY_IS_EQUAL(broadcasted.dimension(0), 5);
+ VERIFY_IS_EQUAL(broadcasted.dimension(1), 7);
+ VERIFY_IS_EQUAL(broadcasted.dimension(2), 13);
+
+ for (int i = 0; i < 5; ++i) {
+ for (int j = 0; j < 7; ++j) {
+ for (int k = 0; k < 13; ++k) {
+ VERIFY_IS_EQUAL(tensor(0,j%7,0), broadcasted(i,j,k));
+ }
+ }
+ }
+}
+
+template <int DataLayout>
+static void test_simple_broadcasting_one_by_n_by_one_2d()
+{
+ Tensor<float, 4, DataLayout> tensor(1,7,13,1);
+ tensor.setRandom();
+ array<ptrdiff_t, 4> broadcasts;
+ broadcasts[0] = 5;
+ broadcasts[1] = 1;
+ broadcasts[2] = 1;
+ broadcasts[3] = 19;
+ Tensor<float, 4, DataLayout> broadcast;
+ broadcast = tensor.broadcast(broadcasts);
+
+ VERIFY_IS_EQUAL(broadcast.dimension(0), 5);
+ VERIFY_IS_EQUAL(broadcast.dimension(1), 7);
+ VERIFY_IS_EQUAL(broadcast.dimension(2), 13);
+ VERIFY_IS_EQUAL(broadcast.dimension(3), 19);
+
+ for (int i = 0; i < 5; ++i) {
+ for (int j = 0; j < 7; ++j) {
+ for (int k = 0; k < 13; ++k) {
+ for (int l = 0; l < 19; ++l) {
+ VERIFY_IS_EQUAL(tensor(0,j%7,k%13,0), broadcast(i,j,k,l));
+ }
+ }
+ }
+ }
+}
void test_cxx11_tensor_broadcasting()
{
@@ -253,4 +306,8 @@ void test_cxx11_tensor_broadcasting()
CALL_SUBTEST(test_simple_broadcasting_n_by_one<RowMajor>());
CALL_SUBTEST(test_simple_broadcasting_one_by_n<ColMajor>());
CALL_SUBTEST(test_simple_broadcasting_n_by_one<ColMajor>());
+ CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_1d<ColMajor>());
+ CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_2d<ColMajor>());
+ CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_1d<RowMajor>());
+ CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_2d<RowMajor>());
}