aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
diff options
context:
space:
mode:
authorGravatar Rasmus Munk Larsen <rmlarsen@google.com>2018-07-13 16:12:38 -0700
committerGravatar Rasmus Munk Larsen <rmlarsen@google.com>2018-07-13 16:12:38 -0700
commit4222550e173c344ef60e14cb2a18fc0bf8968a51 (patch)
treedb51cdc22a58352eccfed9794653b96abb62506b /unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
parent4a3952fd55a213baefeeebcae50366226952acce (diff)
Optimize the case where broadcasting is a no-op.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h30
1 files changed, 23 insertions, 7 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
index b35b36475..3b934a5fd 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h
@@ -105,7 +105,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
typedef typename XprType::CoeffReturnType CoeffReturnType;
typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
- bool nByOne = false, oneByN = false;
+ bool isCopy= false, nByOne = false, oneByN = false;
enum {
IsAligned = true,
@@ -123,9 +123,13 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
EIGEN_STATIC_ASSERT((NumDims > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
const InputDimensions& input_dims = m_impl.dimensions();
const Broadcast& broadcast = op.broadcast();
+ isCopy = true;
for (int i = 0; i < NumDims; ++i) {
eigen_assert(input_dims[i] > 0);
- m_dimensions[i] = input_dims[i] * broadcast[i];
+ m_dimensions[i] = input_dims[i] * m_broadcast[i];
+ if (m_broadcast[i] != 1) {
+ isCopy = false;
+ }
}
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
@@ -197,9 +201,17 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
}
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- return coeffColMajor(index);
+ if (isCopy) {
+ return m_impl.coeff(index);
+ } else {
+ return coeffColMajor(index);
+ }
} else {
- return coeffRowMajor(index);
+ if (isCopy) {
+ return m_impl.coeff(index);
+ } else {
+ return coeffRowMajor(index);
+ }
}
}
@@ -272,7 +284,9 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
}
if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- if (oneByN && !nByOne) {
+ if (isCopy) {
+ return m_impl.template packet<LoadMode>(index);
+ } else if (oneByN && !nByOne) {
return packetNByOne<LoadMode>(index);
} else if (!oneByN && nByOne) {
return packetOneByN<LoadMode>(index);
@@ -282,7 +296,9 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
return packetColMajor<LoadMode>(index);
}
} else {
- if (oneByN && !nByOne) {
+ if (isCopy) {
+ return m_impl.template packet<LoadMode>(index);
+ } else if (oneByN && !nByOne) {
return packetOneByN<LoadMode>(index);
} else if (!oneByN && nByOne) {
return packetNByOne<LoadMode>(index);
@@ -516,7 +532,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
costPerCoeff(bool vectorized) const {
double compute_cost = TensorOpCost::AddCost<Index>();
- if (NumDims > 0) {
+ if (!isCopy && NumDims > 0) {
for (int i = NumDims - 1; i > 0; --i) {
compute_cost += TensorOpCost::DivCost<Index>();
if (internal::index_statically_eq<Broadcast>(i, 1)) {