diff options
author | Eugene Zhulenev <ezhulenev@google.com> | 2018-08-01 12:35:19 -0700 |
---|---|---|
committer | Eugene Zhulenev <ezhulenev@google.com> | 2018-08-01 12:35:19 -0700 |
commit | 64abdf1d7eb17174f571751346dd0cbadcf3bc52 (patch) | |
tree | a112affc194ca8a976e5bba18e46fa9fc9d2179a /unsupported/Eigen/CXX11/src/Tensor | |
parent | 385b3ff12f1dd41a096908a0103873a768a8597d (diff) |
Fix typo + get rid of redundant member variables for block sizes
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor')
6 files changed, 32 insertions, 42 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index b6dbe5a22..cca14aafd 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -120,7 +120,7 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device> // Block based access to the XprType (input) tensor. using TensorBlock = internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout>; using TensorBlockReader = internal::TensorBlockReader<ScalarNoConst, Index, NumDims, Layout>; - // We do block based broadcasting using a a trick with 2x tensor rank and 0 + // We do block based broadcasting using a trick with 2x tensor rank and 0 // strides. See block method implementation for details. using BroadcastDimensions = DSizes<Index, 2 * NumDims>; using BroadcastTensorBlock = internal::TensorBlock<ScalarNoConst, Index, 2 * NumDims, Layout>; @@ -589,8 +589,8 @@ struct TensorEvaluator<const TensorBroadcastingOp<Broadcast, ArgType>, Device> std::vector<internal::TensorOpResourceRequirements>* resources) const { // TODO(wuke): Targeting L1 size is 30% faster than targeting L{-1} on large // tensors. But this might need further tuning. - Index l1_cache_scalars = m_device.firstLevelCacheSize() / sizeof(Scalar); - Index block_total_size_max = numext::maxi(Index(1), l1_cache_scalars); + auto block_total_size_max = numext::maxi<Eigen::Index>( + 1, m_device.firstLevelCacheSize() / sizeof(Scalar)); resources->push_back(internal::TensorOpResourceRequirements( internal::TensorBlockShapeType::kSkewedInnerDims, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h index 7579ab507..aca2ead12 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorChipping.h @@ -202,9 +202,6 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> m_inputStrides[i] = m_inputStrides[i + 1] * input_dims[i + 1]; } } - - m_block_total_size_max = - numext::maxi<Index>(1, device.lastLevelCacheSize() / sizeof(Scalar)); } } @@ -290,9 +287,11 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( std::vector<internal::TensorOpResourceRequirements>* resources) const { + auto block_total_size_max = numext::maxi<Eigen::Index>( + 1, m_device.lastLevelCacheSize() / sizeof(Scalar)); resources->push_back(internal::TensorOpResourceRequirements( internal::TensorBlockShapeType::kSkewedInnerDims, - m_block_total_size_max)); + block_total_size_max)); m_impl.getResourceRequirements(resources); } @@ -370,13 +369,14 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> { Index inputIndex; if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == 0) || - (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == NumInputDims-1)) { + (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == NumInputDims - 1)) { // m_stride is equal to 1, so let's avoid the integer division. eigen_assert(m_stride == 1); inputIndex = index * m_inputStride + m_inputOffset; - } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumInputDims-1) || - (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) { - // m_stride is aways greater than index, so let's avoid the integer division. + } else if ((static_cast<int>(Layout) == static_cast<int>(ColMajor) && m_dim.actualDim() == NumInputDims - 1) || + (static_cast<int>(Layout) == static_cast<int>(RowMajor) && m_dim.actualDim() == 0)) { + // m_stride is aways greater than index, so let's avoid the integer + // division. eigen_assert(m_stride > index); inputIndex = index + m_inputOffset; } else { @@ -392,7 +392,6 @@ struct TensorEvaluator<const TensorChippingOp<DimId, ArgType>, Device> Index m_stride; Index m_inputOffset; Index m_inputStride; - Index m_block_total_size_max; DSizes<Index, NumInputDims> m_inputStrides; TensorEvaluator<ArgType, Device> m_impl; const internal::DimensionId<DimId> m_dim; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h index 39759b6c3..a8247be90 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h @@ -259,7 +259,7 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device> #else EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator( const XprType& op, const Device& device) #endif - : m_impl(op.expression(), device) + : m_device(device), m_impl(op.expression(), device) #ifdef EIGEN_USE_SYCL , m_op(op) #endif @@ -404,9 +404,6 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device> } else { m_fastOutputDepth = internal::TensorIntDivisor<Index>(m_dimensions[NumDims-1]); } - - m_block_total_size_max = - numext::maxi<Index>(1, device.lastLevelCacheSize() / sizeof(Scalar)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } @@ -551,9 +548,11 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( std::vector<internal::TensorOpResourceRequirements>* resources) const { + auto block_total_size_max = numext::maxi<Eigen::Index>( + 1, m_device.lastLevelCacheSize() / sizeof(Scalar)); resources->push_back(internal::TensorOpResourceRequirements( internal::TensorBlockShapeType::kSkewedInnerDims, - m_block_total_size_max)); + block_total_size_max)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block( @@ -743,8 +742,8 @@ struct TensorEvaluator<const TensorImagePatchOp<Rows, Cols, ArgType>, Device> internal::TensorIntDivisor<Index> m_fastOutputDepth; Scalar m_paddingValue; - Index m_block_total_size_max; + const Device& m_device; TensorEvaluator<ArgType, Device> m_impl; #ifdef EIGEN_USE_SYCL // Required for SYCL in order to construct the expression tree on the device diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h index 2630311b8..6ddded0bd 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMorphing.h @@ -560,9 +560,6 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]); } } - - m_block_total_size_max = - numext::maxi<Index>(1, device.lastLevelCacheSize() / sizeof(Scalar)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } @@ -672,9 +669,11 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( std::vector<internal::TensorOpResourceRequirements>* resources) const { + auto block_total_size_max = numext::maxi<Eigen::Index>( + 1, m_device.lastLevelCacheSize() / sizeof(Scalar)); resources->push_back(internal::TensorOpResourceRequirements( internal::TensorBlockShapeType::kSkewedInnerDims, - m_block_total_size_max)); + block_total_size_max)); m_impl.getResourceRequirements(resources); } @@ -761,7 +760,6 @@ struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Devi Dimensions m_dimensions; bool m_is_identity; const StartIndices m_offsets; - Index m_block_total_size_max; }; @@ -1047,9 +1045,6 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]); } } - m_block_total_size_max = numext::maxi(static_cast<std::size_t>(1), - device.lastLevelCacheSize() / - sizeof(Scalar)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } @@ -1128,7 +1123,6 @@ struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, DSizes<Index, NumDims> m_dimensions; DSizes<Index, NumDims> m_offsets; // offset in a flattened shape const Strides m_strides; - std::size_t m_block_total_size_max; //use by sycl const StartIndices m_exprStartIndices; //use by sycl diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index c41783106..73675e7dd 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -572,9 +572,6 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, : (static_cast<int>(Layout) == static_cast<int>(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumOutputDims - 1]; - - m_block_total_size_max = - numext::maxi<Index>(1, device.lastLevelCacheSize() / sizeof(Scalar)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } @@ -771,9 +768,11 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( std::vector<internal::TensorOpResourceRequirements>* resources) const { + auto block_total_size_max = numext::maxi<Eigen::Index>( + 1, m_device.lastLevelCacheSize() / sizeof(Scalar)); resources->push_back(internal::TensorOpResourceRequirements( internal::TensorBlockShapeType::kSkewedInnerDims, - m_block_total_size_max)); + block_total_size_max)); m_impl.getResourceRequirements(resources); } @@ -1204,9 +1203,6 @@ struct TensorEvaluator<const TensorReductionOp<Op, Dims, ArgType, MakePointer_>, // Indexed by reduced dimensions. array<Index, NumReducedDims> m_reducedDims; - // Block size for tiled (aka TensorBlock) evaluation. - Index m_block_total_size_max; - // Evaluator for the input expression. TensorEvaluator<ArgType, Device> m_impl; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h index 77f47bf64..f94c1380d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h @@ -124,8 +124,11 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> using TensorBlock = internal::TensorBlock<ScalarNoConst, Index, NumDims, Layout>; using TensorBlockReader = internal::TensorBlockReader<ScalarNoConst, Index, NumDims, Layout>; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device), m_shuffle(op.shufflePermutation()) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, + const Device& device) + : m_device(device), + m_impl(op.expression(), device), + m_shuffle(op.shufflePermutation()) { const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); const Shuffle& shuffle = op.shufflePermutation(); @@ -162,9 +165,6 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> for (int i = 0; i < NumDims; ++i) { m_inputStrides[i] = m_unshuffledInputStrides[shuffle[i]]; } - - m_block_total_size_max = - numext::maxi<Index>(1, device.firstLevelCacheSize() / sizeof(Scalar)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } @@ -226,9 +226,10 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( std::vector<internal::TensorOpResourceRequirements>* resources) const { + auto block_total_size_max = numext::maxi<Eigen::Index>( + 1, m_device.firstLevelCacheSize() / sizeof(Scalar)); resources->push_back(internal::TensorOpResourceRequirements( - internal::TensorBlockShapeType::kUniformAllDims, - m_block_total_size_max)); + internal::TensorBlockShapeType::kUniformAllDims, block_total_size_max)); m_impl.getResourceRequirements(resources); } @@ -384,7 +385,8 @@ struct TensorEvaluator<const TensorShufflingOp<Shuffle, ArgType>, Device> array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides; array<Index, NumDims> m_inputStrides; array<Index, NumDims> m_unshuffledInputStrides; - Index m_block_total_size_max; + + const Device& m_device; TensorEvaluator<ArgType, Device> m_impl; /// required by sycl Shuffle m_shuffle; |