From 8afce86e6457cf4569d4c420dfc235c819b3475e Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 25 Feb 2015 09:48:54 -0800 Subject: Added support for RowMajor layout to the image patch extraction code Speeded up the unsupported_cxx11_tensor_image_patch test and reduced its memory footprint --- .../Eigen/CXX11/src/Tensor/TensorImagePatch.h | 122 +++++++++++++++------ 1 file changed, 89 insertions(+), 33 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h index bf0e7edfb..59b70ad5c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorImagePatch.h @@ -106,7 +106,8 @@ struct TensorEvaluator, Device> { typedef TensorImagePatchOp XprType; typedef typename XprType::Index Index; - static const int NumDims = internal::array_size::Dimensions>::value + 1; + static const int NumInputDims = internal::array_size::Dimensions>::value; + static const int NumDims = NumInputDims + 1; typedef DSizes Dimensions; typedef typename XprType::Scalar Scalar; @@ -120,16 +121,18 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_impl(op.expression(), device) { - // Only column major tensors are supported for now. - EIGEN_STATIC_ASSERT((static_cast(Layout) == static_cast(ColMajor)), YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT(NumDims >= 4, YOU_MADE_A_PROGRAMMING_MISTAKE); const typename TensorEvaluator::Dimensions& input_dims = m_impl.dimensions(); // Caches a few variables. - m_inputRows = input_dims[1]; - m_inputCols = input_dims[2]; + if (static_cast(Layout) == static_cast(ColMajor)) { + m_inputRows = input_dims[1]; + m_inputCols = input_dims[2]; + } else { + m_inputRows = input_dims[NumInputDims-2]; + m_inputCols = input_dims[NumInputDims-3]; + } m_row_strides = op.row_strides(); m_col_strides = op.col_strides(); @@ -157,28 +160,57 @@ struct TensorEvaluator, Device> } // Dimensions for result of extraction. - // 0: depth - // 1: patch_rows - // 2: patch_cols - // 3: number of patches - // 4 and beyond: anything else (such as batch). - m_dimensions[0] = input_dims[0]; - m_dimensions[1] = op.patch_rows(); - m_dimensions[2] = op.patch_cols(); - m_dimensions[3] = m_outputRows * m_outputCols; - for (int i = 4; i < NumDims; ++i) { - m_dimensions[i] = input_dims[i-1]; + if (static_cast(Layout) == static_cast(ColMajor)) { + // ColMajor + // 0: depth + // 1: patch_rows + // 2: patch_cols + // 3: number of patches + // 4 and beyond: anything else (such as batch). + m_dimensions[0] = input_dims[0]; + m_dimensions[1] = op.patch_rows(); + m_dimensions[2] = op.patch_cols(); + m_dimensions[3] = m_outputRows * m_outputCols; + for (int i = 4; i < NumDims; ++i) { + m_dimensions[i] = input_dims[i-1]; + } + } else { + // RowMajor + // NumDims-1: depth + // NumDims-2: patch_rows + // NumDims-3: patch_cols + // NumDims-4: number of patches + // NumDims-5 and beyond: anything else (such as batch). + m_dimensions[NumDims-1] = input_dims[NumInputDims-1]; + m_dimensions[NumDims-2] = op.patch_rows(); + m_dimensions[NumDims-3] = op.patch_cols(); + m_dimensions[NumDims-4] = m_outputRows * m_outputCols; + for (int i = NumDims-5; i >= 0; --i) { + m_dimensions[i] = input_dims[i]; + } } // Strides for moving the patch in various dimensions. - m_colStride = m_dimensions[1]; - m_patchStride = m_colStride * m_dimensions[2] * m_dimensions[0]; - m_otherStride = m_patchStride * m_dimensions[3]; + if (static_cast(Layout) == static_cast(ColMajor)) { + m_colStride = m_dimensions[1]; + m_patchStride = m_colStride * m_dimensions[2] * m_dimensions[0]; + m_otherStride = m_patchStride * m_dimensions[3]; + } else { + m_colStride = m_dimensions[NumDims-2]; + m_patchStride = m_colStride * m_dimensions[NumDims-3] * m_dimensions[NumDims-1]; + m_otherStride = m_patchStride * m_dimensions[NumDims-4]; + } // Strides for navigating through the input tensor. - m_rowInputStride = input_dims[0]; - m_colInputStride = input_dims[0] * input_dims[1]; - m_patchInputStride = input_dims[0] * input_dims[1] * input_dims[2]; + if (static_cast(Layout) == static_cast(ColMajor)) { + m_rowInputStride = input_dims[0]; + m_colInputStride = input_dims[0] * input_dims[1]; + m_patchInputStride = input_dims[0] * input_dims[1] * input_dims[2]; + } else { + m_rowInputStride = input_dims[NumInputDims-1]; + m_colInputStride = input_dims[NumInputDims-1] * input_dims[NumInputDims-2]; + m_patchInputStride = input_dims[NumInputDims-1] * input_dims[NumInputDims-2] * input_dims[NumInputDims-3]; + } // Fast representations of different variables. m_fastOtherStride = internal::TensorIntDivisor(m_otherStride); @@ -186,7 +218,11 @@ struct TensorEvaluator, Device> m_fastColStride = internal::TensorIntDivisor(m_colStride); // Number of patches in the width dimension. m_fastOutputRows = internal::TensorIntDivisor(m_outputRows); - m_fastDimZero = internal::TensorIntDivisor(m_dimensions[0]); + if (static_cast(Layout) == static_cast(ColMajor)) { + m_fastDimZero = internal::TensorIntDivisor(m_dimensions[0]); + } else { + m_fastDimZero = internal::TensorIntDivisor(m_dimensions[NumDims-1]); + } } typedef typename XprType::CoeffReturnType CoeffReturnType; @@ -207,7 +243,6 @@ struct TensorEvaluator, Device> { // Patch index corresponding to the passed in index. const Index patchIndex = index / m_fastPatchStride; - // Find the offset of the element wrt the location of the first element. const Index patchOffset = (index - patchIndex * m_patchStride) / m_fastDimZero; @@ -232,7 +267,8 @@ struct TensorEvaluator, Device> return Scalar(0); } - const Index depth = index - (index / m_fastDimZero) * m_dimensions[0]; + const int depth_index = static_cast(Layout) == static_cast(ColMajor) ? 0 : NumDims - 1; + const Index depth = index - (index / m_fastDimZero) * m_dimensions[depth_index]; const Index inputIndex = depth + inputRow * m_rowInputStride + inputCol * m_colInputStride + otherIndex * m_patchInputStride; return m_impl.coeff(inputIndex); @@ -286,7 +322,8 @@ struct TensorEvaluator, Device> if (inputRows[0] >= 0 && inputRows[1] < m_inputRows) { // no padding - const Index depth = index - (index / m_fastDimZero) * m_dimensions[0]; + const int depth_index = static_cast(Layout) == static_cast(ColMajor) ? 0 : NumDims - 1; + const Index depth = index - (index / m_fastDimZero) * m_dimensions[depth_index]; const Index inputIndex = depth + inputRows[0] * m_rowInputStride + inputCols[0] * m_colInputStride + otherIndex * m_patchInputStride; return m_impl.template packet(inputIndex); } @@ -309,14 +346,24 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(const array& coords) const { // Location of the first element of the patch. + // ColMajor // 0: d, 1: patch_rows, 2: patch_cols, 3: number of patches, 4: number of batches - const Index patchIndex = coords[3]; + // RowMajor + // 0: number of batches, 1: number of patches, 2: patch_cols , 3: patch_rows, 4: d + const Index patchIndex = coords[static_cast(Layout) == static_cast(ColMajor) ? 3 : 1]; array inputCoords; - inputCoords[0] = coords[0]; // depth - inputCoords[1] = patchIndex / m_inputCols + coords[1] - m_rowPaddingTop; - inputCoords[2] = patchIndex - patchIndex / m_inputCols * m_inputCols + coords[2] - m_colPaddingLeft; - inputCoords[3] = coords[4]; // batch + if (static_cast(Layout) == static_cast(ColMajor)) { + inputCoords[0] = coords[0]; // depth + inputCoords[1] = patchIndex / m_inputCols + coords[1] - m_rowPaddingTop; + inputCoords[2] = patchIndex - patchIndex / m_inputCols * m_inputCols + coords[2] - m_colPaddingLeft; + inputCoords[3] = coords[4]; // batch + } else { + inputCoords[3] = coords[4]; // depth + inputCoords[2] = patchIndex / m_inputCols + coords[3] - m_rowPaddingTop; + inputCoords[1] = patchIndex - patchIndex / m_inputCols * m_inputCols + coords[2] - m_colPaddingLeft; + inputCoords[0] = coords[0]; // batch + } // If the computed coordinates are outside the original image perimeter, return 0. if (inputCoords[1] < 0 || inputCoords[1] >= m_inputRows || inputCoords[2] < 0 || inputCoords[2] >= m_inputCols) { @@ -325,11 +372,20 @@ struct TensorEvaluator, Device> if (TensorEvaluator::CoordAccess) { return m_impl.coeff(inputCoords); } else { - Index inputIndex = + Index inputIndex; + if (static_cast(Layout) == static_cast(ColMajor)) { + inputIndex = inputCoords[3] * m_patchInputStride + inputCoords[2] * m_colInputStride + inputCoords[1] * m_rowInputStride + inputCoords[0]; + } else { + inputIndex = + inputCoords[1] * m_patchInputStride + + inputCoords[2] * m_colInputStride + + inputCoords[3] * m_rowInputStride + + inputCoords[4]; + } return m_impl.coeff(inputIndex); } } -- cgit v1.2.3