From eeabf7975e59b47f4e3677c340013ebbfcfbc2bd Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 12 Nov 2014 22:35:44 -0800 Subject: Optimized broadcasting --- .../Eigen/CXX11/src/Tensor/TensorBroadcasting.h | 36 +++++++++++++++++----- 1 file changed, 29 insertions(+), 7 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index 2bd158dac..a77903dca 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -24,11 +24,13 @@ template struct traits > : public traits { typedef typename XprType::Scalar Scalar; - typedef typename internal::packet_traits::type Packet; - typedef typename traits::StorageKind StorageKind; - typedef typename traits::Index Index; + typedef traits XprTraits; + typedef typename packet_traits::type Packet; + typedef typename XprTraits::StorageKind StorageKind; + typedef typename XprTraits::Index Index; typedef typename XprType::Nested Nested; typedef typename remove_reference::type _Nested; + static const int NumDimensions = XprTraits::NumDimensions; }; template @@ -85,6 +87,7 @@ struct TensorEvaluator, Device> static const int NumDims = internal::array_size::Dimensions>::value; typedef DSizes Dimensions; typedef typename XprType::Scalar Scalar; + typedef typename TensorEvaluator::Dimensions InputDimensions; enum { IsAligned = false, @@ -129,10 +132,19 @@ struct TensorEvaluator, Device> Index inputIndex = 0; for (int i = NumDims - 1; i > 0; --i) { const Index idx = index / m_outputStrides[i]; - inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; + if (internal::index_statically_eq()(i, 1)) { + eigen_assert(idx % m_impl.dimensions()[i] == 0); + } else { + inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; + } index -= idx * m_outputStrides[i]; } - inputIndex += (index % m_impl.dimensions()[0]); + if (internal::index_statically_eq()(0, 1)) { + eigen_assert(index < m_impl.dimensions()[0]); + inputIndex += index; + } else { + inputIndex += (index % m_impl.dimensions()[0]); + } return m_impl.coeff(inputIndex); } @@ -150,10 +162,20 @@ struct TensorEvaluator, Device> Index inputIndex = 0; for (int i = NumDims - 1; i > 0; --i) { const Index idx = index / m_outputStrides[i]; - inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; + if (internal::index_statically_eq()(i, 1)) { + eigen_assert(idx % m_impl.dimensions()[i] == 0); + } else { + inputIndex += (idx % m_impl.dimensions()[i]) * m_inputStrides[i]; + } index -= idx * m_outputStrides[i]; } - const Index innermostLoc = index % m_impl.dimensions()[0]; + Index innermostLoc; + if (internal::index_statically_eq()(0, 1)) { + eigen_assert(index < m_impl.dimensions()[0]); + innermostLoc = index; + } else { + innermostLoc = index % m_impl.dimensions()[0]; + } inputIndex += innermostLoc; // Todo: this could be extended to the second dimension if we're not -- cgit v1.2.3