diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-04-29 13:41:26 -0700 |
---|---|---|
committer | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-04-29 13:41:26 -0700 |
commit | 07a247dcf4e86f9f741b68e1d8e0897de3eeca57 (patch) | |
tree | d103bd20faa1f103035bac2f21507ecc65f97f68 /unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h | |
parent | fa5a8f055aebbf4f39fca26e857351103fab4d11 (diff) | |
parent | 0f3c4c8ff4a6635db77195a8919c743f34181cc2 (diff) |
Pulled latest updates from upstream
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h | 30 |
1 files changed, 19 insertions, 11 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h index 0bf460f4e..a87e45330 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorPatch.h @@ -85,6 +85,10 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device> static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value + 1; typedef DSizes<Index, NumDims> Dimensions; typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; + static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; + enum { IsAligned = false, @@ -137,9 +141,6 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device> } } - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* /*data*/) { @@ -183,12 +184,11 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device> template<int LoadMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { - const int packetSize = internal::unpacket_traits<PacketReturnType>::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(index+packetSize-1 < dimensions().TotalSize()); + EIGEN_STATIC_ASSERT(PacketSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(index+PacketSize-1 < dimensions().TotalSize()); Index output_stride_index = (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? NumDims - 1 : 0; - Index indices[2] = {index, index + packetSize - 1}; + Index indices[2] = {index, index + PacketSize - 1}; Index patchIndices[2] = {indices[0] / m_outputStrides[output_stride_index], indices[1] / m_outputStrides[output_stride_index]}; Index patchOffsets[2] = {indices[0] - patchIndices[0] * m_outputStrides[output_stride_index], @@ -229,15 +229,15 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device> inputIndices[0] += (patchIndices[0] + patchOffsets[0]); inputIndices[1] += (patchIndices[1] + patchOffsets[1]); - if (inputIndices[1] - inputIndices[0] == packetSize - 1) { + if (inputIndices[1] - inputIndices[0] == PacketSize - 1) { PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]); return rslt; } else { - EIGEN_ALIGN_MAX CoeffReturnType values[packetSize]; + EIGEN_ALIGN_MAX CoeffReturnType values[PacketSize]; values[0] = m_impl.coeff(inputIndices[0]); - values[packetSize-1] = m_impl.coeff(inputIndices[1]); - for (int i = 1; i < packetSize-1; ++i) { + values[PacketSize-1] = m_impl.coeff(inputIndices[1]); + for (int i = 1; i < PacketSize-1; ++i) { values[i] = coeff(index+i); } PacketReturnType rslt = internal::pload<PacketReturnType>(values); @@ -245,6 +245,14 @@ struct TensorEvaluator<const TensorPatchOp<PatchDim, ArgType>, Device> } } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const { + const double compute_cost = NumDims * (TensorOpCost::DivCost<Index>() + + TensorOpCost::MulCost<Index>() + + 2 * TensorOpCost::AddCost<Index>()); + return m_impl.costPerCoeff(vectorized) + + TensorOpCost(0, 0, compute_cost, vectorized, PacketSize); + } + EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } protected: |