// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2014 Benoit Steiner // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_CXX11_NEURAL_NETWORKS_POOLING_H #define EIGEN_CXX11_NEURAL_NETWORKS_POOLING_H #include "Patch3d.h" namespace Eigen { /** SpatialMaxPooling * \ingroup CXX11_NeuralNetworks_Module * * \brief Applies a max-pooling over a multichannel input image. * * The input parameter is expected to be a with a rank of 4 (channels, height, width, others in col-major, and the reverse of that in row-major). * * The result can be assigned to a tensor of rank equal to the rank of the input. The dimensions of the result will be channels, height, width, and others (in col-major, and the reverse of that if the input was row-major). * * The order of the width and height dimensions can be swapped if needed. * */ #if !defined(EIGEN_HAS_INDEX_LIST) template EIGEN_ALWAYS_INLINE static const TensorReshapingOp::Index, internal::traits::NumDimensions>, const TensorReductionOp::Scalar>::type>, const Eigen::array, const TensorImagePatchOp > > #else template EIGEN_ALWAYS_INLINE static const TensorReshapingOp::Index, internal::traits::NumDimensions>, const TensorReductionOp::Scalar>::type>, typename internal::conditional::Layout == ColMajor, const Eigen::IndexList, Eigen::type2index<2> >, const Eigen::IndexList, Eigen::type2index<3> > >::type, const TensorImagePatchOp > > #endif SpatialMaxPooling(const Input& input, DenseIndex patchRows, DenseIndex patchCols, DenseIndex strideRows, DenseIndex strideCols, const PaddingType padding_type, DenseIndex in_strideRows = 1, DenseIndex in_strideCols = 1) { EIGEN_STATIC_ASSERT(internal::traits::NumDimensions == 4, YOU_MADE_A_PROGRAMMING_MISTAKE); typedef typename internal::traits::Index TensorIndex; TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex> > in(input); const DenseIndex patchRowsEff = patchRows + (patchRows - 1) * (in_strideRows - 1); const DenseIndex patchColsEff = patchCols + (patchCols - 1) * (in_strideCols - 1); static const bool isColMajor = (internal::traits::Layout == ColMajor); static const int idxRows = isColMajor ? 1 : 2; static const int idxCols = isColMajor ? 2 : 1; // Molds the output of the reduction into the shape expected by the user. // (assuming col-major): // - 1st dim: channels // - 2nd dim: output height // - 3rd dim: output width // - 4th dim and beyond: everything else including batch size Eigen::DSizes::NumDimensions> post_reduce_dims; post_reduce_dims[0] = in.dimension(0); if (padding_type == PADDING_VALID) { post_reduce_dims[idxRows] = numext::ceil((in.dimension(idxRows) - patchRowsEff + 1.f) / static_cast(strideRows)); post_reduce_dims[idxCols] = numext::ceil((in.dimension(idxCols) - patchColsEff + 1.f) / static_cast(strideCols)); } else { post_reduce_dims[idxRows] = numext::ceil(in.dimension(idxRows) / static_cast(strideRows)); post_reduce_dims[idxCols] = numext::ceil(in.dimension(idxCols) / static_cast(strideCols)); } post_reduce_dims[3] = in.dimension(3); #if !defined(EIGEN_HAS_INDEX_LIST) // nvcc doesn't support cxx11 Eigen::array reduction_dims; if (isColMajor) { reduction_dims[0] = 1; reduction_dims[1] = 2; } else { reduction_dims[0] = 2; reduction_dims[1] = 3; } #else // Take advantage of cxx11 to give the compiler information it can use to // optimize the code. typename internal::conditional::Layout == ColMajor, const Eigen::IndexList, Eigen::type2index<2> >, const Eigen::IndexList, Eigen::type2index<3> > >::type reduction_dims; #endif return input.extract_image_patches(patchRows, patchCols, strideRows, strideCols, in_strideRows, in_strideCols, padding_type, -Eigen::NumTraits::Scalar>::type>::highest()).maximum(reduction_dims).reshape(post_reduce_dims); } /** CuboidMaxPooling * \ingroup CXX11_NeuralNetworks_Module * * \brief Applies a max-pooling over a multichannel input volume. * * The input parameter is expected to be a tensor with a rank of 5 (channels, depth, height, width, others in col-major, and the reverse of that in row-major). * * The result can be assigned to a tensor of rank equal to the rank of the input. The dimensions of the result will be channels, depth, height, width, and others (in col-major, and the reverse of that if the input was row-major). * * The order of the depth, width and height dimensions can be swapped if needed. * */ #if !defined(EIGEN_HAS_INDEX_LIST) template EIGEN_ALWAYS_INLINE static const TensorReshapingOp< const Eigen::DSizes::NumDimensions>, const TensorReductionOp< internal::MaxReducer, const Eigen::array, const TensorReshapingOp< const Eigen::DSizes, const TensorVolumePatchOp > > > #else template EIGEN_ALWAYS_INLINE static const TensorReshapingOp< const Eigen::DSizes::NumDimensions>, const TensorReductionOp< internal::MaxReducer, const Eigen::IndexList >, const TensorReshapingOp< const Eigen::DSizes, const TensorVolumePatchOp > > > #endif CuboidMaxPooling(const Input& input, DenseIndex patchPlanes, DenseIndex patchRows, DenseIndex patchCols, DenseIndex stridePlanes, DenseIndex strideRows, DenseIndex strideCols, const PaddingType padding_type) { EIGEN_STATIC_ASSERT(internal::traits::NumDimensions == 5, YOU_MADE_A_PROGRAMMING_MISTAKE); static const bool isColMajor = (internal::traits::Layout == ColMajor); typedef typename internal::traits::Index TensorIndex; TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex> > in(input); static const int idxPlanes = isColMajor ? 1 : 3; static const int idxRows = 2; static const int idxCols = isColMajor ? 3 : 1; // Molds the output of the reduction into the shape expected by the used // (assuming col-major): // - 1st dim: channels // - 2nd dim: output depth // - 3rd dim: output height // - 4th dim: output width // - 5th dim and beyond: everything else including batch size Eigen::DSizes::NumDimensions> post_reduce_dims; post_reduce_dims[0] = in.dimension(0); if (padding_type == PADDING_VALID) { post_reduce_dims[idxPlanes] = numext::ceil((in.dimension(idxPlanes) - patchPlanes + 1.f) / static_cast(stridePlanes)); post_reduce_dims[idxRows] = numext::ceil((in.dimension(idxRows) - patchRows + 1.f) / static_cast(strideRows)); post_reduce_dims[idxCols] = numext::ceil((in.dimension(idxCols) - patchCols + 1.f) / static_cast(strideCols)); } else { post_reduce_dims[idxPlanes] = numext::ceil(in.dimension(idxPlanes) / static_cast(stridePlanes)); post_reduce_dims[idxRows] = numext::ceil(in.dimension(idxRows) / static_cast(strideRows)); post_reduce_dims[idxCols] = numext::ceil(in.dimension(idxCols) / static_cast(strideCols)); } post_reduce_dims[4] = in.dimension(4); Eigen::DSizes pre_reduce_dims; pre_reduce_dims[1] = patchRows * patchCols * patchPlanes; if (isColMajor) { pre_reduce_dims[0] = post_reduce_dims[0]; pre_reduce_dims[2] = post_reduce_dims[1] * post_reduce_dims[2] * post_reduce_dims[3] * post_reduce_dims[4]; } else { pre_reduce_dims[0] = post_reduce_dims[0] * post_reduce_dims[1] * post_reduce_dims[2] * post_reduce_dims[3]; pre_reduce_dims[2] = post_reduce_dims[4]; } #if !defined(EIGEN_HAS_INDEX_LIST) // nvcc doesn't support cxx11 Eigen::array reduction_dims; reduction_dims[0] = 1; #else // Take advantage of cxx11 to give the compiler information it can use to // optimize the code. Eigen::IndexList > reduction_dims; #endif return input.extract_volume_patches(patchPlanes, patchRows, patchCols, stridePlanes, strideRows, strideCols, padding_type, -Eigen::NumTraits::highest()) .reshape(pre_reduce_dims) .maximum(reduction_dims) .reshape(post_reduce_dims); } /** SpatialAvgPooling * \ingroup CXX11_NeuralNetworks_Module * * \brief Applies an average pooling over a multichannel input image. * * The input parameter is expected to be a tensor with a rank of 4 (channels, height, width, others in col-major, and the reverse of that in row-major). * * The result can be assigned to a tensor of rank equal to the rank of the input. The dimensions of the result will be channels, height, width, and others (in col-major, and the reverse of that if the input was row-major). * * The order of the width and height dimensions can be swapped if needed. * */ namespace internal { template struct AvgPoolMeanReducer { #if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__) // We only support packet access for floats. static const bool PacketAccess = internal::is_same::value; #else static const bool PacketAccess = false; #endif static const bool IsStateful = true; EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE AvgPoolMeanReducer() : scalarCount_(0) { typedef typename packet_traits::type Packet; packetCount_ = pset1(0.0); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) { if (t != -Eigen::NumTraits::highest()) { (*accum) = (*accum) + t; scalarCount_++; } } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { return static_cast(0); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { eigen_assert(scalarCount_ > 0); return accum / scalarCount_; } #if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__) #ifdef EIGEN_VECTORIZE_AVX #define pequal(a,b) _mm256_cmp_ps(a,b,_CMP_EQ_UQ) #define psel(a,b,false_mask) _mm256_blendv_ps(a,b,false_mask) #else #define pequal(a,b) _mm_cmpeq_ps(a,b) #define psel(a,b,false_mask) _mm_or_ps(_mm_andnot_ps(false_mask, a), _mm_and_ps(false_mask, b)) #endif template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) { reducePacketWithType(static_cast(0), p, accum); } template void reducePacketWithType(T, const Packet& p, Packet* accum) { Packet skip_mask = pequal(p, pset1(-Eigen::NumTraits::highest())); (*accum) = padd(*accum, psel(p, pset1(0), skip_mask)); packetCount_ = padd(packetCount_, psel(pset1(1), pset1(0), skip_mask)); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { return pset1(0); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { return pdiv(vaccum, packetCount_); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { return (saccum + predux(vaccum)) / (scalarCount_ + predux(packetCount_)); } #endif protected: typedef typename packet_traits::type Packet; int scalarCount_; Packet packetCount_; }; } // namespace internal #if !defined(EIGEN_HAS_INDEX_LIST) template EIGEN_ALWAYS_INLINE static const TensorReshapingOp::Index, internal::traits::NumDimensions>, const TensorReductionOp::Scalar>::type>, const Eigen::array, const TensorImagePatchOp > > #else template EIGEN_ALWAYS_INLINE static const TensorReshapingOp::Index, internal::traits::NumDimensions>, const TensorReductionOp::Scalar>::type>, typename internal::conditional::Layout == ColMajor, const Eigen::IndexList, Eigen::type2index<2> >, const Eigen::IndexList, Eigen::type2index<3> > >::type, const TensorImagePatchOp > > #endif SpatialAvgPooling(const Input& input, DenseIndex patchRows, DenseIndex patchCols, DenseIndex strideRows, DenseIndex strideCols, const PaddingType padding_type, DenseIndex in_strideRows = 1, DenseIndex in_strideCols = 1) { EIGEN_STATIC_ASSERT(internal::traits::NumDimensions == 4, YOU_MADE_A_PROGRAMMING_MISTAKE); typedef typename internal::traits::Index TensorIndex; TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex> > in(input); const DenseIndex patchRowsEff = patchRows + (patchRows - 1) * (in_strideRows - 1); const DenseIndex patchColsEff = patchCols + (patchCols - 1) * (in_strideCols - 1); static const bool isColMajor = (internal::traits::Layout == ColMajor); static const int idxRows = isColMajor ? 1 : 2; static const int idxCols = isColMajor ? 2 : 1; // Molds the output of the reduction into the shape expected by the user. // (assuming col-major): // - 1st dim: channels // - 2nd dim: output height // - 3rd dim: output width // - 4th dim and beyond: everything else including batch size Eigen::DSizes::NumDimensions> post_reduce_dims; post_reduce_dims[0] = in.dimension(0); if (padding_type == PADDING_VALID) { post_reduce_dims[idxRows] = numext::ceil((in.dimension(idxRows) - patchRowsEff + 1.f) / static_cast(strideRows)); post_reduce_dims[idxCols] = numext::ceil((in.dimension(idxCols) - patchColsEff + 1.f) / static_cast(strideCols)); } else { post_reduce_dims[idxRows] = numext::ceil(in.dimension(idxRows) / static_cast(strideRows)); post_reduce_dims[idxCols] = numext::ceil(in.dimension(idxCols) / static_cast(strideCols)); } post_reduce_dims[3] = in.dimension(3); typedef typename internal::remove_const::Scalar>::type CoeffReturnType; internal::AvgPoolMeanReducer mean_with_nan; #if !defined(EIGEN_HAS_INDEX_LIST) // nvcc doesn't support cxx11 Eigen::array reduction_dims; if (isColMajor) { reduction_dims[0] = 1; reduction_dims[1] = 2; } else { reduction_dims[0] = 2; reduction_dims[1] = 3; } #else // Take advantage of cxx11 to give the compiler information it can use to // optimize the code. typename internal::conditional::Layout == ColMajor, const Eigen::IndexList, Eigen::type2index<2> >, const Eigen::IndexList, Eigen::type2index<3> > >::type reduction_dims; #endif return input.extract_image_patches(patchRows, patchCols, strideRows, strideCols, in_strideRows, in_strideCols, padding_type, -Eigen::NumTraits::Scalar>::type>::highest()).reduce(reduction_dims, mean_with_nan).reshape(post_reduce_dims); } /** CuboidAvgPooling * \ingroup CXX11_NeuralNetworks_Module * * \brief Applies an average pooling over a multichannel input volume. * * The input parameter is expected to be a tensor with a rank of 5 (channels, depth, height, width, others, and the reverse of that in row-major). * * The result can be assigned to a tensor of rank equal to the rank of the input. The dimensions of the result will be channels, depth, width, and others (in col-major, and the reverse of that if the input was row-major). * * The order of the depth, width and height dimensions can be swapped if needed. * */ #if !defined(EIGEN_HAS_INDEX_LIST) template EIGEN_ALWAYS_INLINE static const TensorReshapingOp< const Eigen::DSizes::NumDimensions>, const TensorReductionOp< internal::AvgPoolMeanReducer, const Eigen::array, const TensorReshapingOp< const Eigen::DSizes, const TensorVolumePatchOp > > > #else template EIGEN_ALWAYS_INLINE static const TensorReshapingOp< const Eigen::DSizes::NumDimensions>, const TensorReductionOp< internal::AvgPoolMeanReducer, const Eigen::IndexList >, const TensorReshapingOp< const Eigen::DSizes, const TensorVolumePatchOp > > > #endif CuboidAvgPooling(const Input& input, DenseIndex patchPlanes, DenseIndex patchRows, DenseIndex patchCols, DenseIndex stridePlanes, DenseIndex strideRows, DenseIndex strideCols, const PaddingType padding_type) { EIGEN_STATIC_ASSERT(internal::traits::NumDimensions == 5, YOU_MADE_A_PROGRAMMING_MISTAKE); static const bool isColMajor = (internal::traits::Layout == ColMajor); typedef typename internal::traits::Index TensorIndex; TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex> > in(input); static const int idxPlanes = isColMajor ? 1 : 3; static const int idxRows = 2; static const int idxCols = isColMajor ? 3 : 1; // Molds the output of the reduction into the shape expected by the used // (assuming col-major): // - 1st dim: channels // - 2nd dim: outupt depth // - 3rd dim: output height // - 4th dim: output width // - 5th dim and beyond: everything else including batch size Eigen::DSizes::NumDimensions> post_reduce_dims; post_reduce_dims[0] = in.dimension(0); if (padding_type == PADDING_VALID) { post_reduce_dims[idxPlanes] = numext::ceil((in.dimension(idxPlanes) - patchPlanes + 1.f) / static_cast(stridePlanes)); post_reduce_dims[idxRows] = numext::ceil((in.dimension(idxRows) - patchRows + 1.f) / static_cast(strideRows)); post_reduce_dims[idxCols] = numext::ceil((in.dimension(idxCols) - patchCols + 1.f) / static_cast(strideCols)); } else { post_reduce_dims[idxPlanes] = numext::ceil(in.dimension(idxPlanes) / static_cast(stridePlanes)); post_reduce_dims[idxRows] = numext::ceil(in.dimension(idxRows) / static_cast(strideRows)); post_reduce_dims[idxCols] = numext::ceil(in.dimension(idxCols) / static_cast(strideCols)); } post_reduce_dims[4] = in.dimension(4); Eigen::DSizes pre_reduce_dims; pre_reduce_dims[1] = patchRows * patchCols * patchPlanes; if (isColMajor) { pre_reduce_dims[0] = post_reduce_dims[0]; pre_reduce_dims[2] = post_reduce_dims[1] * post_reduce_dims[2] * post_reduce_dims[3] * post_reduce_dims[4]; } else { pre_reduce_dims[0] = post_reduce_dims[0] * post_reduce_dims[1] * post_reduce_dims[2] * post_reduce_dims[3]; pre_reduce_dims[2] = post_reduce_dims[4]; } typedef typename internal::remove_const::Scalar>::type CoeffReturnType; internal::AvgPoolMeanReducer mean_with_nan; #if !defined(EIGEN_HAS_INDEX_LIST) // nvcc doesn't support cxx11 Eigen::array reduction_dims; reduction_dims[0] = 1; #else // Take advantage of cxx11 to give the compiler information it can use to // optimize the code. Eigen::IndexList > reduction_dims; #endif return input.extract_volume_patches(patchPlanes, patchRows, patchCols, stridePlanes, strideRows, strideCols, padding_type, -Eigen::NumTraits::highest()) .reshape(pre_reduce_dims) .reduce(reduction_dims, mean_with_nan) .reshape(post_reduce_dims); } } // end namespace Eigen #endif // EIGEN_CXX11_NEURAL_NETWORKS_POOLING_H