/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ #ifndef TENSORFLOW_CORE_KERNELS_EIGEN_POOLING_H_ #define TENSORFLOW_CORE_KERNELS_EIGEN_POOLING_H_ #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/kernels/eigen_volume_patch.h" namespace Eigen { /** SpatialMaxPooling * \ingroup CXX11_NeuralNetworks_Module * * \brief Applies a max-pooling over a multichannel input image. * * The input parameter is expected to be a with a rank of 4 (channels, height, * width, others in col-major, and the reverse of that in row-major). * * The result can be assigned to a tensor of rank equal to the rank of the * input. The dimensions of the result will be channels, height, width, and * others (in col-major, and the reverse of that if the input was row-major). * * The order of the width and height dimensions can be swapped if needed. * */ #if !defined(EIGEN_HAS_INDEX_LIST) template EIGEN_ALWAYS_INLINE static const TensorReshapingOp< const Eigen::DSizes::Index, internal::traits::NumDimensions>, const TensorReductionOp< internal::MaxReducer::Scalar>::type>, const Eigen::array, const TensorImagePatchOp > > #else template EIGEN_ALWAYS_INLINE static const TensorReshapingOp< const Eigen::DSizes::Index, internal::traits::NumDimensions>, const TensorReductionOp< internal::MaxReducer::Scalar>::type>, typename internal::conditional< internal::traits::Layout == ColMajor, const Eigen::IndexList, Eigen::type2index<2> >, const Eigen::IndexList, Eigen::type2index<3> > >::type, const TensorImagePatchOp > > #endif SpatialMaxPooling(const Input& input, DenseIndex patchRows, DenseIndex patchCols, DenseIndex strideRows, DenseIndex strideCols, const PaddingType padding_type, DenseIndex in_strideRows = 1, DenseIndex in_strideCols = 1) { EIGEN_STATIC_ASSERT(internal::traits::NumDimensions == 4, YOU_MADE_A_PROGRAMMING_MISTAKE); typedef typename internal::traits::Index TensorIndex; TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex> > in(input); const DenseIndex patchRowsEff = patchRows + (patchRows - 1) * (in_strideRows - 1); const DenseIndex patchColsEff = patchCols + (patchCols - 1) * (in_strideCols - 1); static const bool isColMajor = (internal::traits::Layout == ColMajor); static const int idxRows = isColMajor ? 1 : 2; static const int idxCols = isColMajor ? 2 : 1; // Molds the output of the reduction into the shape expected by the user. // (assuming col-major): // - 1st dim: channels // - 2nd dim: output height // - 3rd dim: output width // - 4th dim and beyond: everything else including batch size Eigen::DSizes::NumDimensions> post_reduce_dims; post_reduce_dims[0] = in.dimension(0); if (padding_type == PADDING_VALID) { post_reduce_dims[idxRows] = Eigen::divup( static_cast(in.dimension(idxRows)) - patchRowsEff + 1, strideRows); post_reduce_dims[idxCols] = Eigen::divup( static_cast(in.dimension(idxCols)) - patchColsEff + 1, strideCols); } else { post_reduce_dims[idxRows] = Eigen::divup( static_cast(in.dimension(idxRows)), strideRows); post_reduce_dims[idxCols] = Eigen::divup( static_cast(in.dimension(idxCols)), strideCols); } post_reduce_dims[3] = in.dimension(3); #if !defined(EIGEN_HAS_INDEX_LIST) // nvcc doesn't support cxx11 Eigen::array reduction_dims; if (isColMajor) { reduction_dims[0] = 1; reduction_dims[1] = 2; } else { reduction_dims[0] = 2; reduction_dims[1] = 3; } #else // Take advantage of cxx11 to give the compiler information it can use to // optimize the code. typename internal::conditional< internal::traits::Layout == ColMajor, const Eigen::IndexList, Eigen::type2index<2> >, const Eigen::IndexList, Eigen::type2index<3> > >::type reduction_dims; #endif return input .extract_image_patches( patchRows, patchCols, strideRows, strideCols, in_strideRows, in_strideCols, padding_type, -Eigen::NumTraits::Scalar>::type>::highest()) .maximum(reduction_dims) .reshape(post_reduce_dims); } /** CuboidMaxPooling * \ingroup CXX11_NeuralNetworks_Module * * \brief Applies a max-pooling over a multichannel input volume. * * The input parameter is expected to be a tensor with a rank of 5 (channels, * depth, height, width, others in col-major, and the reverse of that in * row-major). * * The result can be assigned to a tensor of rank equal to the rank of the * input. The dimensions of the result will be channels, depth, height, width, * and others (in col-major, and the reverse of that if the input was * row-major). * * The order of the depth, width and height dimensions can be swapped if * needed. * */ #if !defined(EIGEN_HAS_INDEX_LIST) template EIGEN_ALWAYS_INLINE static const TensorReshapingOp< const Eigen::DSizes::NumDimensions>, const TensorReductionOp< internal::MaxReducer, const Eigen::array, const TensorReshapingOp< const Eigen::DSizes, const TensorVolumePatchOp > > > #else template EIGEN_ALWAYS_INLINE static const TensorReshapingOp< const Eigen::DSizes::NumDimensions>, const TensorReductionOp< internal::MaxReducer, const Eigen::IndexList >, const TensorReshapingOp< const Eigen::DSizes, const TensorVolumePatchOp > > > #endif CuboidMaxPooling(const Input& input, DenseIndex patchPlanes, DenseIndex patchRows, DenseIndex patchCols, DenseIndex stridePlanes, DenseIndex strideRows, DenseIndex strideCols, const PaddingType padding_type) { EIGEN_STATIC_ASSERT(internal::traits::NumDimensions == 5, YOU_MADE_A_PROGRAMMING_MISTAKE); static const bool isColMajor = (internal::traits::Layout == ColMajor); typedef typename internal::traits::Index TensorIndex; TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex> > in(input); static const int idxPlanes = isColMajor ? 1 : 3; static const int idxRows = 2; static const int idxCols = isColMajor ? 3 : 1; // Molds the output of the reduction into the shape expected by the used // (assuming col-major): // - 1st dim: channels // - 2nd dim: output depth // - 3rd dim: output height // - 4th dim: output width // - 5th dim and beyond: everything else including batch size Eigen::DSizes::NumDimensions> post_reduce_dims; post_reduce_dims[0] = in.dimension(0); if (padding_type == PADDING_VALID) { post_reduce_dims[idxPlanes] = Eigen::divup( static_cast(in.dimension(idxPlanes)) - patchPlanes + 1, stridePlanes); post_reduce_dims[idxRows] = Eigen::divup( static_cast(in.dimension(idxRows)) - patchRows + 1, strideRows); post_reduce_dims[idxCols] = Eigen::divup( static_cast(in.dimension(idxCols)) - patchCols + 1, strideCols); } else { post_reduce_dims[idxPlanes] = Eigen::divup( static_cast(in.dimension(idxPlanes)), stridePlanes); post_reduce_dims[idxRows] = Eigen::divup( static_cast(in.dimension(idxRows)), strideRows); post_reduce_dims[idxCols] = Eigen::divup( static_cast(in.dimension(idxCols)), strideCols); } post_reduce_dims[4] = in.dimension(4); Eigen::DSizes pre_reduce_dims; pre_reduce_dims[1] = patchRows * patchCols * patchPlanes; if (isColMajor) { pre_reduce_dims[0] = post_reduce_dims[0]; pre_reduce_dims[2] = post_reduce_dims[1] * post_reduce_dims[2] * post_reduce_dims[3] * post_reduce_dims[4]; } else { pre_reduce_dims[0] = post_reduce_dims[0] * post_reduce_dims[1] * post_reduce_dims[2] * post_reduce_dims[3]; pre_reduce_dims[2] = post_reduce_dims[4]; } #if !defined(EIGEN_HAS_INDEX_LIST) // nvcc doesn't support cxx11 Eigen::array reduction_dims; reduction_dims[0] = 1; #else // Take advantage of cxx11 to give the compiler information it can use to // optimize the code. Eigen::IndexList > reduction_dims; #endif return input .extract_volume_patches(patchPlanes, patchRows, patchCols, stridePlanes, strideRows, strideCols, padding_type, -Eigen::NumTraits::highest()) .reshape(pre_reduce_dims) .maximum(reduction_dims) .reshape(post_reduce_dims); } /** SpatialAvgPooling * \ingroup CXX11_NeuralNetworks_Module * * \brief Applies an average pooling over a multichannel input image. * * The input parameter is expected to be a tensor with a rank of 4 (channels, * height, width, others in col-major, and the reverse of that in row-major). * * The result can be assigned to a tensor of rank equal to the rank of the * input. The dimensions of the result will be channels, height, width, and * others (in col-major, and the reverse of that if the input was row-major). * * The order of the width and height dimensions can be swapped if needed. * */ namespace internal { template struct AvgPoolMeanReducer { #if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__) // We only support packet access for floats. static const bool PacketAccess = internal::is_same::value; #else static const bool PacketAccess = false; #endif static const bool IsStateful = true; EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE AvgPoolMeanReducer() : scalarCount_(0) { typedef typename packet_traits::type Packet; packetCount_ = pset1(T(0.0)); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) { if (t != -Eigen::NumTraits::highest()) { (*accum) = (*accum) + t; scalarCount_++; } } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { return static_cast(0); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { eigen_assert(scalarCount_ > 0); return accum / T(scalarCount_); } #if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__) #ifdef EIGEN_VECTORIZE_AVX512 #define pequal(a, b) \ _mm512_castsi512_ps( \ _mm512_maskz_set1_epi32(_mm512_cmp_ps_mask(a, b, _CMP_EQ_UQ), -1)) // The ternarylogic function immediate determines the values in the result // In the case below, 0xd8 implies (false_mask) ? (b) : (a) // For details, refer to the vpternlogd instruction table at // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-vol-2c-manual.pdf #define psel(a, b, false_mask) \ _mm512_castsi512_ps(_mm512_ternarylogic_epi32( \ _mm512_castps_si512(a), _mm512_castps_si512(b), \ _mm512_castps_si512(false_mask), 0xd8)) #elif defined EIGEN_VECTORIZE_AVX #define pequal(a, b) _mm256_cmp_ps(a, b, _CMP_EQ_UQ) #define psel(a, b, false_mask) _mm256_blendv_ps(a, b, false_mask) #else #define pequal(a, b) _mm_cmpeq_ps(a, b) #define psel(a, b, false_mask) \ _mm_or_ps(_mm_andnot_ps(false_mask, a), _mm_and_ps(false_mask, b)) #endif template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) { reducePacketWithType(static_cast(0), p, accum); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacketWithType( T, const Packet& p, Packet* accum) { Packet skip_mask = pequal(p, pset1(-Eigen::NumTraits::highest())); (*accum) = padd(*accum, psel(p, pset1(0), skip_mask)); packetCount_ = padd( packetCount_, psel(pset1(1), pset1(0), skip_mask)); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { return pset1(0); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { return pdiv(vaccum, packetCount_); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { return (saccum + predux(vaccum)) / (scalarCount_ + predux(packetCount_)); } #endif protected: typedef typename packet_traits::type Packet; int scalarCount_; Packet packetCount_; }; template struct reducer_traits, Device> { enum { Cost = 1, #if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__) // We only support packet access for floats. PacketAccess = true, #else PacketAccess = false, #endif IsStateful = true, IsExactlyAssociative = false }; }; template <> struct reducer_traits, GpuDevice> { enum { Cost = 1, PacketAccess = false, IsStateful = true, IsExactlyAssociative = false }; }; } // namespace internal #if !defined(EIGEN_HAS_INDEX_LIST) template EIGEN_ALWAYS_INLINE static const TensorReshapingOp< const Eigen::DSizes::Index, internal::traits::NumDimensions>, const TensorReductionOp< internal::AvgPoolMeanReducer::Scalar>::type>, const Eigen::array, const TensorImagePatchOp > > #else template EIGEN_ALWAYS_INLINE static const TensorReshapingOp< const Eigen::DSizes::Index, internal::traits::NumDimensions>, const TensorReductionOp< internal::AvgPoolMeanReducer::Scalar>::type>, typename internal::conditional< internal::traits::Layout == ColMajor, const Eigen::IndexList, Eigen::type2index<2> >, const Eigen::IndexList, Eigen::type2index<3> > >::type, const TensorImagePatchOp > > #endif SpatialAvgPooling(const Input& input, DenseIndex patchRows, DenseIndex patchCols, DenseIndex strideRows, DenseIndex strideCols, const PaddingType padding_type, DenseIndex in_strideRows = 1, DenseIndex in_strideCols = 1) { EIGEN_STATIC_ASSERT(internal::traits::NumDimensions == 4, YOU_MADE_A_PROGRAMMING_MISTAKE); typedef typename internal::traits::Index TensorIndex; TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex> > in(input); const DenseIndex patchRowsEff = patchRows + (patchRows - 1) * (in_strideRows - 1); const DenseIndex patchColsEff = patchCols + (patchCols - 1) * (in_strideCols - 1); static const bool isColMajor = (internal::traits::Layout == ColMajor); static const int idxRows = isColMajor ? 1 : 2; static const int idxCols = isColMajor ? 2 : 1; // Molds the output of the reduction into the shape expected by the user. // (assuming col-major): // - 1st dim: channels // - 2nd dim: output height // - 3rd dim: output width // - 4th dim and beyond: everything else including batch size Eigen::DSizes::NumDimensions> post_reduce_dims; post_reduce_dims[0] = in.dimension(0); if (padding_type == PADDING_VALID) { post_reduce_dims[idxRows] = Eigen::divup( static_cast(in.dimension(idxRows)) - patchRowsEff + 1, strideRows); post_reduce_dims[idxCols] = Eigen::divup( static_cast(in.dimension(idxCols)) - patchColsEff + 1, strideCols); } else { post_reduce_dims[idxRows] = Eigen::divup( static_cast(in.dimension(idxRows)), strideRows); post_reduce_dims[idxCols] = Eigen::divup( static_cast(in.dimension(idxCols)), strideCols); } post_reduce_dims[3] = in.dimension(3); typedef typename internal::remove_const< typename internal::traits::Scalar>::type CoeffReturnType; internal::AvgPoolMeanReducer mean_with_nan; #if !defined(EIGEN_HAS_INDEX_LIST) // nvcc doesn't support cxx11 Eigen::array reduction_dims; if (isColMajor) { reduction_dims[0] = 1; reduction_dims[1] = 2; } else { reduction_dims[0] = 2; reduction_dims[1] = 3; } #else // Take advantage of cxx11 to give the compiler information it can use to // optimize the code. typename internal::conditional< internal::traits::Layout == ColMajor, const Eigen::IndexList, Eigen::type2index<2> >, const Eigen::IndexList, Eigen::type2index<3> > >::type reduction_dims; #endif return input .extract_image_patches(patchRows, patchCols, strideRows, strideCols, in_strideRows, in_strideCols, padding_type, -Eigen::NumTraits::highest()) .reduce(reduction_dims, mean_with_nan) .reshape(post_reduce_dims); } /** CuboidAvgPooling * \ingroup CXX11_NeuralNetworks_Module * * \brief Applies an average pooling over a multichannel input volume. * * The input parameter is expected to be a tensor with a rank of 5 (channels, * depth, height, width, others, and the reverse of that in row-major). * * The result can be assigned to a tensor of rank equal to the rank of the * input. The dimensions of the result will be channels, depth, width, and * others (in col-major, and the reverse of that if the input was row-major). * * The order of the depth, width and height dimensions can be swapped if * needed. * */ #if !defined(EIGEN_HAS_INDEX_LIST) template EIGEN_ALWAYS_INLINE static const TensorReshapingOp< const Eigen::DSizes::NumDimensions>, const TensorReductionOp< internal::AvgPoolMeanReducer, const Eigen::array, const TensorReshapingOp< const Eigen::DSizes, const TensorVolumePatchOp > > > #else template EIGEN_ALWAYS_INLINE static const TensorReshapingOp< const Eigen::DSizes::NumDimensions>, const TensorReductionOp< internal::AvgPoolMeanReducer, const Eigen::IndexList >, const TensorReshapingOp< const Eigen::DSizes, const TensorVolumePatchOp > > > #endif CuboidAvgPooling(const Input& input, DenseIndex patchPlanes, DenseIndex patchRows, DenseIndex patchCols, DenseIndex stridePlanes, DenseIndex strideRows, DenseIndex strideCols, const PaddingType padding_type) { EIGEN_STATIC_ASSERT(internal::traits::NumDimensions == 5, YOU_MADE_A_PROGRAMMING_MISTAKE); static const bool isColMajor = (internal::traits::Layout == ColMajor); typedef typename internal::traits::Index TensorIndex; TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex> > in(input); static const int idxPlanes = isColMajor ? 1 : 3; static const int idxRows = 2; static const int idxCols = isColMajor ? 3 : 1; // Molds the output of the reduction into the shape expected by the used // (assuming col-major): // - 1st dim: channels // - 2nd dim: outupt depth // - 3rd dim: output height // - 4th dim: output width // - 5th dim and beyond: everything else including batch size Eigen::DSizes::NumDimensions> post_reduce_dims; post_reduce_dims[0] = in.dimension(0); if (padding_type == PADDING_VALID) { post_reduce_dims[idxPlanes] = Eigen::divup( static_cast(in.dimension(idxPlanes)) - patchPlanes + 1, stridePlanes); post_reduce_dims[idxRows] = Eigen::divup( static_cast(in.dimension(idxRows)) - patchRows + 1, strideRows); post_reduce_dims[idxCols] = Eigen::divup( static_cast(in.dimension(idxCols)) - patchCols + 1, strideCols); } else { post_reduce_dims[idxPlanes] = Eigen::divup( static_cast(in.dimension(idxPlanes)), stridePlanes); post_reduce_dims[idxRows] = Eigen::divup( static_cast(in.dimension(idxRows)), strideRows); post_reduce_dims[idxCols] = Eigen::divup( static_cast(in.dimension(idxCols)), strideCols); } post_reduce_dims[4] = in.dimension(4); Eigen::DSizes pre_reduce_dims; pre_reduce_dims[1] = patchRows * patchCols * patchPlanes; if (isColMajor) { pre_reduce_dims[0] = post_reduce_dims[0]; pre_reduce_dims[2] = post_reduce_dims[1] * post_reduce_dims[2] * post_reduce_dims[3] * post_reduce_dims[4]; } else { pre_reduce_dims[0] = post_reduce_dims[0] * post_reduce_dims[1] * post_reduce_dims[2] * post_reduce_dims[3]; pre_reduce_dims[2] = post_reduce_dims[4]; } typedef typename internal::remove_const< typename internal::traits::Scalar>::type CoeffReturnType; internal::AvgPoolMeanReducer mean_with_nan; #if !defined(EIGEN_HAS_INDEX_LIST) // nvcc doesn't support cxx11 Eigen::array reduction_dims; reduction_dims[0] = 1; #else // Take advantage of cxx11 to give the compiler information it can use to // optimize the code. Eigen::IndexList > reduction_dims; #endif return input .extract_volume_patches(patchPlanes, patchRows, patchCols, stridePlanes, strideRows, strideCols, padding_type, -Eigen::NumTraits::highest()) .reshape(pre_reduce_dims) .reduce(reduction_dims, mean_with_nan) .reshape(post_reduce_dims); } } // end namespace Eigen #endif // TENSORFLOW_CORE_KERNELS_EIGEN_POOLING_H_