From 64abdf1d7eb17174f571751346dd0cbadcf3bc52 Mon Sep 17 00:00:00 2001 From: Eugene Zhulenev Date: Wed, 1 Aug 2018 12:35:19 -0700 Subject: Fix typo + get rid of redundant member variables for block sizes --- .../Eigen/CXX11/src/Tensor/TensorBroadcasting.h | 6 +++--- unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h | 17 ++++++++--------- unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h | 11 +++++------ unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h | 12 +++--------- unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h | 10 +++------- unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h | 18 ++++++++++-------- 6 files changed, 32 insertions(+), 42 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index b6dbe5a22..cca14aafd 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -120,7 +120,7 @@ struct TensorEvaluator, Device> // Block based access to the XprType (input) tensor. using TensorBlock = internal::TensorBlock; using TensorBlockReader = internal::TensorBlockReader; - // We do block based broadcasting using a a trick with 2x tensor rank and 0 + // We do block based broadcasting using a trick with 2x tensor rank and 0 // strides. See block method implementation for details. using BroadcastDimensions = DSizes; using BroadcastTensorBlock = internal::TensorBlock; @@ -589,8 +589,8 @@ struct TensorEvaluator, Device> std::vector* resources) const { // TODO(wuke): Targeting L1 size is 30% faster than targeting L{-1} on large // tensors. But this might need further tuning. - Index l1_cache_scalars = m_device.firstLevelCacheSize() / sizeof(Scalar); - Index block_total_size_max = numext::maxi(Index(1), l1_cache_scalars); + auto block_total_size_max = numext::maxi( + 1, m_device.firstLevelCacheSize() / sizeof(Scalar)); resources->push_back(internal::TensorOpResourceRequirements( internal::TensorBlockShapeType::kSkewedInnerDims, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h index 7579ab507..aca2ead12 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h @@ -202,9 +202,6 @@ struct TensorEvaluator, Device> m_inputStrides[i] = m_inputStrides[i + 1] * input_dims[i + 1]; } } - - m_block_total_size_max = - numext::maxi(1, device.lastLevelCacheSize() / sizeof(Scalar)); } } @@ -290,9 +287,11 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( std::vector* resources) const { + auto block_total_size_max = numext::maxi( + 1, m_device.lastLevelCacheSize() / sizeof(Scalar)); resources->push_back(internal::TensorOpResourceRequirements( internal::TensorBlockShapeType::kSkewedInnerDims, - m_block_total_size_max)); + block_total_size_max)); m_impl.getResourceRequirements(resources); } @@ -370,13 +369,14 @@ struct TensorEvaluator, Device> { Index inputIndex; if ((static_cast(Layout) == static_cast(ColMajor) && m_dim.actualDim() == 0) || - (static_cast(Layout) == static_cast(RowMajor) && m_dim.actualDim() == NumInputDims-1)) { + (static_cast(Layout) == static_cast(RowMajor) && m_dim.actualDim() == NumInputDims - 1)) { // m_stride is equal to 1, so let's avoid the integer division. eigen_assert(m_stride == 1); inputIndex = index * m_inputStride + m_inputOffset; - } else if ((static_cast(Layout) == static_cast(ColMajor) && m_dim.actualDim() == NumInputDims-1) || - (static_cast(Layout) == static_cast(RowMajor) && m_dim.actualDim() == 0)) { - // m_stride is aways greater than index, so let's avoid the integer division. + } else if ((static_cast(Layout) == static_cast(ColMajor) && m_dim.actualDim() == NumInputDims - 1) || + (static_cast(Layout) == static_cast(RowMajor) && m_dim.actualDim() == 0)) { + // m_stride is aways greater than index, so let's avoid the integer + // division. eigen_assert(m_stride > index); inputIndex = index + m_inputOffset; } else { @@ -392,7 +392,6 @@ struct TensorEvaluator, Device> Index m_stride; Index m_inputOffset; Index m_inputStride; - Index m_block_total_size_max; DSizes m_inputStrides; TensorEvaluator m_impl; const internal::DimensionId m_dim; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h index 39759b6c3..a8247be90 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h @@ -259,7 +259,7 @@ struct TensorEvaluator, Device> #else EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator( const XprType& op, const Device& device) #endif - : m_impl(op.expression(), device) + : m_device(device), m_impl(op.expression(), device) #ifdef EIGEN_USE_SYCL , m_op(op) #endif @@ -404,9 +404,6 @@ struct TensorEvaluator, Device> } else { m_fastOutputDepth = internal::TensorIntDivisor(m_dimensions[NumDims-1]); } - - m_block_total_size_max = - numext::maxi(1, device.lastLevelCacheSize() / sizeof(Scalar)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } @@ -551,9 +548,11 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( std::vector* resources) const { + auto block_total_size_max = numext::maxi( + 1, m_device.lastLevelCacheSize() / sizeof(Scalar)); resources->push_back(internal::TensorOpResourceRequirements( internal::TensorBlockShapeType::kSkewedInnerDims, - m_block_total_size_max)); + block_total_size_max)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block( @@ -743,8 +742,8 @@ struct TensorEvaluator, Device> internal::TensorIntDivisor m_fastOutputDepth; Scalar m_paddingValue; - Index m_block_total_size_max; + const Device& m_device; TensorEvaluator m_impl; #ifdef EIGEN_USE_SYCL // Required for SYCL in order to construct the expression tree on the device diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h index 2630311b8..6ddded0bd 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -560,9 +560,6 @@ struct TensorEvaluator, Devi m_fastOutputStrides[i] = internal::TensorIntDivisor(m_outputStrides[i]); } } - - m_block_total_size_max = - numext::maxi(1, device.lastLevelCacheSize() / sizeof(Scalar)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } @@ -672,9 +669,11 @@ struct TensorEvaluator, Devi EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( std::vector* resources) const { + auto block_total_size_max = numext::maxi( + 1, m_device.lastLevelCacheSize() / sizeof(Scalar)); resources->push_back(internal::TensorOpResourceRequirements( internal::TensorBlockShapeType::kSkewedInnerDims, - m_block_total_size_max)); + block_total_size_max)); m_impl.getResourceRequirements(resources); } @@ -761,7 +760,6 @@ struct TensorEvaluator, Devi Dimensions m_dimensions; bool m_is_identity; const StartIndices m_offsets; - Index m_block_total_size_max; }; @@ -1047,9 +1045,6 @@ struct TensorEvaluator(degenerate ? 1 : m_outputStrides[i]); } } - m_block_total_size_max = numext::maxi(static_cast(1), - device.lastLevelCacheSize() / - sizeof(Scalar)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } @@ -1128,7 +1123,6 @@ struct TensorEvaluator m_dimensions; DSizes m_offsets; // offset in a flattened shape const Strides m_strides; - std::size_t m_block_total_size_max; //use by sycl const StartIndices m_exprStartIndices; //use by sycl diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index c41783106..73675e7dd 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -572,9 +572,6 @@ struct TensorEvaluator, : (static_cast(Layout) == static_cast(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumOutputDims - 1]; - - m_block_total_size_max = - numext::maxi(1, device.lastLevelCacheSize() / sizeof(Scalar)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } @@ -771,9 +768,11 @@ struct TensorEvaluator, EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( std::vector* resources) const { + auto block_total_size_max = numext::maxi( + 1, m_device.lastLevelCacheSize() / sizeof(Scalar)); resources->push_back(internal::TensorOpResourceRequirements( internal::TensorBlockShapeType::kSkewedInnerDims, - m_block_total_size_max)); + block_total_size_max)); m_impl.getResourceRequirements(resources); } @@ -1204,9 +1203,6 @@ struct TensorEvaluator, // Indexed by reduced dimensions. array m_reducedDims; - // Block size for tiled (aka TensorBlock) evaluation. - Index m_block_total_size_max; - // Evaluator for the input expression. TensorEvaluator m_impl; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h index 77f47bf64..f94c1380d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h @@ -124,8 +124,11 @@ struct TensorEvaluator, Device> using TensorBlock = internal::TensorBlock; using TensorBlockReader = internal::TensorBlockReader; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device), m_shuffle(op.shufflePermutation()) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, + const Device& device) + : m_device(device), + m_impl(op.expression(), device), + m_shuffle(op.shufflePermutation()) { const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); const Shuffle& shuffle = op.shufflePermutation(); @@ -162,9 +165,6 @@ struct TensorEvaluator, Device> for (int i = 0; i < NumDims; ++i) { m_inputStrides[i] = m_unshuffledInputStrides[shuffle[i]]; } - - m_block_total_size_max = - numext::maxi(1, device.firstLevelCacheSize() / sizeof(Scalar)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } @@ -226,9 +226,10 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( std::vector* resources) const { + auto block_total_size_max = numext::maxi( + 1, m_device.firstLevelCacheSize() / sizeof(Scalar)); resources->push_back(internal::TensorOpResourceRequirements( - internal::TensorBlockShapeType::kUniformAllDims, - m_block_total_size_max)); + internal::TensorBlockShapeType::kUniformAllDims, block_total_size_max)); m_impl.getResourceRequirements(resources); } @@ -384,7 +385,8 @@ struct TensorEvaluator, Device> array, NumDims> m_fastOutputStrides; array m_inputStrides; array m_unshuffledInputStrides; - Index m_block_total_size_max; + + const Device& m_device; TensorEvaluator m_impl; /// required by sycl Shuffle m_shuffle; -- cgit v1.2.3