diff options
author | Vijay Vasudevan <vrv@google.com> | 2016-03-17 17:46:03 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2016-03-18 08:47:32 -0700 |
commit | 50edc4e878c076c44fecc847af110a19b171eb63 (patch) | |
tree | 8ea27610ddefee566c2a3f750c2e034085af5b83 /tensorflow | |
parent | 1a39c2c1979706084338352d1264951b3ec9c6bc (diff) |
TensorFlow: move eigen some NN code from our third_party/eigen3 copy
to being part of TF, add tests.
Change: 117509710
Diffstat (limited to 'tensorflow')
26 files changed, 7283 insertions, 11 deletions
diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index d4d9f2f22f..1d51656a48 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -54,6 +54,7 @@ cc_library( name = "conv_2d", hdrs = ["conv_2d.h"], deps = [ + ":eigen_helpers", "//tensorflow/core:framework", "//third_party/eigen3", ], @@ -214,6 +215,24 @@ cc_header_only_library( deps = [":bounds_check"], ) +cc_library( + name = "eigen_helpers", + hdrs = [ + "eigen_activations.h", + "eigen_attention.h", + "eigen_backward_cuboid_convolutions.h", + "eigen_backward_spatial_convolutions.h", + "eigen_cuboid_convolution.h", + "eigen_patch_3d.h", + "eigen_pooling.h", + "eigen_softmax.h", + "eigen_spatial_convolutions.h", + ], + deps = [ + "//third_party/eigen3", + ], +) + # OpKernel libraries ---------------------------------------------------------- tf_kernel_libraries( @@ -529,12 +548,12 @@ tf_kernel_libraries( name = "image", prefixes = [ "adjust_contrast_op", - "attention_ops", "colorspace_op", "decode_jpeg_op", "decode_png_op", "draw_bounding_box_op", "encode_jpeg_op", + "attention_ops", "encode_png_op", "random_crop_op", "resize_area_op", @@ -544,6 +563,7 @@ tf_kernel_libraries( "sample_distorted_bounding_box_op", ], deps = [ + ":eigen_helpers", "//tensorflow/core:framework", "//tensorflow/core:image_ops_op_lib", "//tensorflow/core:lib", @@ -556,6 +576,27 @@ tf_kernel_libraries( tf_cc_tests( linkstatic = tf_kernel_tests_linkstatic(), # Required for benchmarking tests = [ + "eigen_activations_test", + "eigen_attention_test", + "eigen_backward_spatial_convolutions_test", + "eigen_pooling_test", + "eigen_softmax_test", + "eigen_spatial_convolutions_test", + ], + deps = [ + ":eigen_helpers", + "//tensorflow/core:core_cpu", + "//tensorflow/core:framework", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) + +tf_cc_tests( + linkstatic = tf_kernel_tests_linkstatic(), # Required for benchmarking + tests = [ "adjust_contrast_op_benchmark_test", "adjust_contrast_op_test", "colorspace_op_test", @@ -820,6 +861,7 @@ tf_kernel_library( ], deps = [ ":conv_2d", + ":eigen_helpers", ":ops_util", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", @@ -1029,6 +1071,15 @@ filegroup( srcs = [ "avgpooling_op.h", "bounds_check.h", + "eigen_activations.h", + "eigen_attention.h", + "eigen_backward_cuboid_convolutions.h", + "eigen_backward_spatial_convolutions.h", + "eigen_cuboid_convolution.h", + "eigen_patch_3d.h", + "eigen_pooling.h", + "eigen_softmax.h", + "eigen_spatial_convolutions.h", "maxpooling_op.h", "ops_util.cc", "ops_util.h", diff --git a/tensorflow/core/kernels/attention_ops.cc b/tensorflow/core/kernels/attention_ops.cc index 59e147bf93..36c1b26476 100644 --- a/tensorflow/core/kernels/attention_ops.cc +++ b/tensorflow/core/kernels/attention_ops.cc @@ -18,12 +18,12 @@ limitations under the License. #define EIGEN_USE_THREADS #include <vector> -#include "third_party/eigen3/unsupported/Eigen/CXX11/NeuralNetworks" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/eigen_attention.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" diff --git a/tensorflow/core/kernels/avgpooling_op.cc b/tensorflow/core/kernels/avgpooling_op.cc index 37c502ad69..a3c03601c8 100644 --- a/tensorflow/core/kernels/avgpooling_op.cc +++ b/tensorflow/core/kernels/avgpooling_op.cc @@ -20,13 +20,13 @@ limitations under the License. #include "tensorflow/core/kernels/avgpooling_op.h" #include <vector> -#include "third_party/eigen3/unsupported/Eigen/CXX11/NeuralNetworks" #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_slice.h" +#include "tensorflow/core/kernels/eigen_pooling.h" #include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/kernels/pooling_ops_common.h" #include "tensorflow/core/lib/core/errors.h" diff --git a/tensorflow/core/kernels/avgpooling_op.h b/tensorflow/core/kernels/avgpooling_op.h index 0b577971f3..2804cdbee5 100644 --- a/tensorflow/core/kernels/avgpooling_op.h +++ b/tensorflow/core/kernels/avgpooling_op.h @@ -17,8 +17,8 @@ limitations under the License. #define TENSORFLOW_KERNELS_AVGPOOLING_OP_H_ // Functor definition for AvgPoolingOp, must be compilable by nvcc. -#include "third_party/eigen3/unsupported/Eigen/CXX11/NeuralNetworks" #include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/kernels/eigen_pooling.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/conv_2d.h b/tensorflow/core/kernels/conv_2d.h index 141343ec3b..9d06853053 100644 --- a/tensorflow/core/kernels/conv_2d.h +++ b/tensorflow/core/kernels/conv_2d.h @@ -16,9 +16,10 @@ limitations under the License. #ifndef TENSORFLOW_KERNELS_CONV_2D_H_ #define TENSORFLOW_KERNELS_CONV_2D_H_ -#include "third_party/eigen3/unsupported/Eigen/CXX11/NeuralNetworks" #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/kernels/eigen_backward_spatial_convolutions.h" +#include "tensorflow/core/kernels/eigen_spatial_convolutions.h" #include "tensorflow/core/util/tensor_format.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/eigen_activations.h b/tensorflow/core/kernels/eigen_activations.h new file mode 100644 index 0000000000..252e434811 --- /dev/null +++ b/tensorflow/core/kernels/eigen_activations.h @@ -0,0 +1,125 @@ +/* Copyright 2015 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_ACTIVATIONS_H_ +#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_ACTIVATIONS_H_ + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +namespace Eigen { + +/** scalar_sigmoid_fast_derivative_op + * \ingroup CXX11_NeuralNetworks_Module + * \brief Template functor to compute the fast derivative of a sigmoid + * + * Input should be the backpropagated gradient. + * + * \sa class CwiseUnaryOp, Cwise::sigmoid_fast_derivative() + */ +template <typename T> +struct scalar_sigmoid_fast_derivative_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_sigmoid_fast_derivative_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& y) const { + const T one = T(1); + return (one - y) * y; + } + + template <typename Packet> + inline Packet packetOp(const Packet& y) const { + const Packet one = internal::pset1<Packet>(1); + return internal::pmul(internal::psub(one, y), y); + } +}; + +namespace internal { +template <typename T> +struct functor_traits<scalar_sigmoid_fast_derivative_op<T> > { + enum { + Cost = NumTraits<T>::AddCost * 2 + NumTraits<T>::MulCost, + PacketAccess = packet_traits<T>::HasAdd && packet_traits<T>::HasMul && + packet_traits<T>::HasNegate + }; +}; +} // namespace internal + +/** scalar_tanh_fast_derivative_op + * \ingroup CXX11_NeuralNetworks_Module + * \brief Template functor to compute the fast derivative of a tanh + * + * Input should be the backpropagated gradient. + * + * \sa class CwiseUnaryOp, Cwise::tanh_fast_derivative() + */ +template <typename T> +struct scalar_tanh_fast_derivative_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_fast_derivative_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& y) const { + const T one = T(1); + return one - (y * y); + } + + template <typename Packet> + inline Packet packetOp(const Packet& y) const { + const Packet one = internal::pset1<Packet>(1); + return internal::psub(one, internal::pmul(y, y)); + } +}; + +namespace internal { +template <typename T> +struct functor_traits<scalar_tanh_fast_derivative_op<T> > { + enum { + Cost = NumTraits<T>::AddCost * 2 + NumTraits<T>::MulCost * 1, + PacketAccess = packet_traits<T>::HasAdd && packet_traits<T>::HasMul && + packet_traits<T>::HasNegate + }; +}; +} // namespace internal + +/** + * \ingroup CXX11_NeuralNetworks_Module + * \brief Template functor to clip the the magnitude of the first scalar. + * + * \sa class CwiseBinaryOp, MatrixBase::Clip + */ +template <typename Scalar> +struct scalar_clip_op { + EIGEN_EMPTY_STRUCT_CTOR(scalar_clip_op) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar + operator()(const Scalar& a, const Scalar& b) const { + return numext::mini(numext::maxi(a, -b), b); + } + template <typename Packet> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet + packetOp(const Packet& a, const Packet& b) const { + return internal::pmin(internal::pmax(a, internal::pnegate(b)), b); + } +}; + +namespace internal { +template <typename Scalar> +struct functor_traits<scalar_clip_op<Scalar> > { + enum { + Cost = NumTraits<Scalar>::AddCost * 3, + PacketAccess = packet_traits<Scalar>::HasMax && + packet_traits<Scalar>::HasMin && + packet_traits<Scalar>::HasNegate + }; +}; +} // namespace internal + +} // end namespace Eigen + +#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_ACTIVATIONS_H_ diff --git a/tensorflow/core/kernels/eigen_activations_test.cc b/tensorflow/core/kernels/eigen_activations_test.cc new file mode 100644 index 0000000000..390f6e8840 --- /dev/null +++ b/tensorflow/core/kernels/eigen_activations_test.cc @@ -0,0 +1,101 @@ +/* Copyright 2015 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/kernels/eigen_activations.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/platform/test.h" + +namespace Eigen { + +namespace { +void EigenApprox(float a, float b) { + ASSERT_TRUE(std::abs(a - b) <= std::min(std::abs(a), std::abs(b)) * 1e-3); +} +} + +TEST(EigenBackwardSpatialConvolutionsTest, SigmoidFastDerivative) { + const ptrdiff_t depth = 3; + const ptrdiff_t batch = 10; + const ptrdiff_t rows = 32; + const ptrdiff_t cols = 48; + + Tensor<float, 4> input(depth, rows, cols, batch); + input.setRandom(); + + Tensor<float, 4> result(depth, rows, cols, batch); + result = input.unaryExpr(scalar_sigmoid_fast_derivative_op<float>()); + + for (int b = 0; b < batch; ++b) { + for (int c = 0; c < cols; ++c) { + for (int r = 0; r < rows; ++r) { + for (int d = 0; d < depth; ++d) { + float val = input(d, r, c, b); + EigenApprox(result(d, r, c, b), (1 - val) * val); + } + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, TanhFastDerivative) { + const ptrdiff_t depth = 3; + const ptrdiff_t batch = 10; + const ptrdiff_t rows = 32; + const ptrdiff_t cols = 48; + + Tensor<float, 4> input(depth, rows, cols, batch); + input.setRandom(); + + Tensor<float, 4> result(depth, rows, cols, batch); + result = input.unaryExpr(scalar_tanh_fast_derivative_op<float>()); + + for (int b = 0; b < batch; ++b) { + for (int c = 0; c < cols; ++c) { + for (int r = 0; r < rows; ++r) { + for (int d = 0; d < depth; ++d) { + float val = input(d, r, c, b); + EigenApprox(result(d, r, c, b), 1 - (val * val)); + } + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, Clip) { + const ptrdiff_t depth = 3; + const ptrdiff_t batch = 10; + const ptrdiff_t rows = 32; + const ptrdiff_t cols = 48; + + Tensor<float, 4> input(depth, rows, cols, batch); + input.setRandom(); + + Tensor<float, 4> result(depth, rows, cols, batch); + result = input.binaryExpr(input.constant(0.01), scalar_clip_op<float>()); + + for (int b = 0; b < batch; ++b) { + for (int c = 0; c < cols; ++c) { + for (int r = 0; r < rows; ++r) { + for (int d = 0; d < depth; ++d) { + float val = input(d, r, c, b); + EigenApprox(result(d, r, c, b), + (std::min)((std::max)(val, -0.01f), 0.01f)); + } + } + } + } +} + +} // namespace Eigen diff --git a/tensorflow/core/kernels/eigen_attention.h b/tensorflow/core/kernels/eigen_attention.h new file mode 100644 index 0000000000..e7bdda1693 --- /dev/null +++ b/tensorflow/core/kernels/eigen_attention.h @@ -0,0 +1,244 @@ +/* Copyright 2015 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_ATTENTION_H_ +#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_ATTENTION_H_ + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +namespace Eigen { + +/** ExtractGlimpses + * \ingroup CXX11_NeuralNetworks_Module + * + * \brief Extract glimpses from an input tensor. + * + * The input parameter is expected to be a col-major tensor with a rank of 4 (depth, x, y, and batch). + * The width and height parameters specify the extension of the returned glimpses. + * The offsets parameter specifies the x, y locations of the center of the glimpses relative to the center of the input image. The vector is expected to contain one IndexPair for each image in the batch dimension. + * The normalized boolean indicates if incoming coordinates are normalized so that 0.0 and 1.0 correspond to the minimum and maximum of each height and width dimension. + * The centered boolean indicates if incoming coordinates are centered relative to the image, in which case -1.0 and 1.0 correspond to minimum and maximum of each dimension while 0.0 corresponds to the center. + * + * The result can be assigned to a tensor of rank equal to that of the input. The result will be laid out in col-major order (depth, x, y, batch). + * The dimensions of the result will be equal to the dimensions of the input except for width and height which will be equal to the requested glimpse size. + */ +namespace { +template <typename Index> +struct GlimpseExtractionOp { + GlimpseExtractionOp(const Index width, const Index height, + const std::vector<IndexPair<float> >& offsets, + const bool normalized, + const bool centered, + const bool uniform_noise) : + width_(width), height_(height), offsets_(offsets), + normalized_(normalized), centered_(centered), uniform_noise_(uniform_noise) { } + + template <typename Input> + DSizes<Index, 4> dimensions(const Input& input) const { + typedef typename internal::traits<Input>::Index IndexType; + typedef TensorRef<Tensor<typename internal::traits<Input>::Scalar, 4, + internal::traits<Input>::Layout, IndexType> > Ref; + Ref in(input); + + DSizes<Index, 4> dims = in.dimensions(); + + dims[0] = in.dimension(0); + dims[1] = width_; + dims[2] = height_; + dims[3] = in.dimension(3); + return dims; + } + + template <typename Input, typename Output, typename Device> + EIGEN_DEVICE_FUNC + void eval(const Input& input, Output& output, const Device& device) const + { + typedef typename internal::traits<Input>::Index IndexType; + typedef TensorRef<Tensor<typename internal::traits<Input>::Scalar, 4, + internal::traits<Input>::Layout, IndexType> > Ref; + Ref in(input); + const Index num_channels = in.dimension(0); + const Index input_width = in.dimension(1); + const Index input_height = in.dimension(2); + const Index batch_size = in.dimension(3); + eigen_assert(input_width > 0); + eigen_assert(input_height > 0); + internal::NormalRandomGenerator<float> gen; + internal::UniformRandomGenerator<float> unigen; + + for (Index i = 0; i < batch_size; ++i) { + float x = offsets_[i].first, y = offsets_[i].second; + + // Un-normalize coordinates back to pixel space if normalized. + if (normalized_) { + x *= input_width; + y *= input_height; + } + // Un-center if coordinates are centered on the image center. + if (centered_) { + x /= 2.0f; + y /= 2.0f; + x += input_width / 2.0f; + y += input_height / 2.0f; + } + // Remove half of the glimpse window. + x -= width_ / 2.0f; + y -= height_ / 2.0f; + + const Index offset_x = (Index) x; + const Index offset_y = (Index) y; + Index glimpse_width = width_; + Index glimpse_height = height_; + bool partial_overlap = false; + DSizes<Index, 3> slice_offset(0, offset_x, offset_y); + DSizes<Index, 3> slice_extent(num_channels, width_, height_); + DSizes<Index, 3> base_offset(0, 0, 0); + + if (offset_x < 0) { + slice_offset[1] = 0; + glimpse_width = (std::max<Index>)(0, width_ + offset_x); + slice_extent[1] = glimpse_width; + base_offset[1] = width_ - glimpse_width; + partial_overlap = true; + } else if (offset_x + width_ >= input_width) { + glimpse_width = (std::max<Index>)(0, input_width - offset_x); + slice_extent[1] = glimpse_width; + partial_overlap = true; + } + if (offset_y < 0) { + slice_offset[2] = 0; + glimpse_height = (std::max<Index>)(0, height_ + offset_y); + slice_extent[2] = glimpse_height; + base_offset[2] = height_ - glimpse_height; + partial_overlap = true; + } else if (offset_y + height_ >= input_height) { + glimpse_height = (std::max<Index>)(0, input_height - offset_y); + slice_extent[2] = glimpse_height; + partial_overlap = true; + } + slice_extent[1] = std::min<Index>(input_width, slice_extent[1]); + slice_extent[2] = std::min<Index>(input_height, slice_extent[2]); + + + if (partial_overlap) { + + if (uniform_noise_) { + // Initialize the glimpse with uniform noise. + typedef typename internal::remove_const< + typename internal::traits<Input>::Scalar>::type Scalar; + TensorFixedSize<Scalar, Sizes<> > mini; + mini.device(device) = input.template chip<3>(i).minimum(); + TensorFixedSize<float, Sizes<> > range; + range.device(device) = (input.template chip<3>(i).maximum() - mini) + .template cast<float>(); + + DSizes<Index, 3> glimpse_size(num_channels, width_, height_); + TensorMap<Tensor<float, 3> > tmp(NULL, glimpse_size); + output.template chip<3>(i).device(device) = + mini.reshape(Sizes<1, 1, 1>()).broadcast(glimpse_size) + + (tmp.random(unigen) * + range.reshape(Sizes<1, 1, 1>()).broadcast(glimpse_size)) + .template cast<Scalar>(); + } else { + // Initialize the glimpse with white noise: compute the mean and sigma + // of each channel, and use them to shape the gaussian. + DSizes<Index, 2> glimpse_size(width_, height_); + DSizes<Index, 2> input_size(input_width, input_height); + typedef typename internal::remove_const< + typename internal::traits<Input>::Scalar>::type Scalar; + + for (int j = 0; j < num_channels; ++j) { + TensorFixedSize<Scalar, Sizes<> > mean; + mean.device(device) = input.template chip<3>(i) + .template chip<0>(j) + .template cast<float>() + .mean(); + TensorFixedSize<float, Sizes<> > sigma; + sigma.device(device) = + (input.template chip<3>(i) + .template chip<0>(j) + .template cast<float>() - + mean.reshape(Sizes<1, 1>()).broadcast(input_size)) + .square() + .mean() + .sqrt(); + TensorFixedSize<Scalar, Sizes<> > mini; + mini.device(device) = + input.template chip<3>(i).template chip<0>(j).minimum(); + TensorFixedSize<float, Sizes<> > maxi; + maxi.device(device) = + input.template chip<3>(i).template chip<0>(j).maximum(); + + TensorMap<Tensor<float, 2> > tmp(NULL, glimpse_size); + output.template chip<3>(i).template chip<0>(j).device(device) = + (mean.reshape(Sizes<1, 1>()).broadcast(glimpse_size) + + (tmp.random(gen) * + sigma.reshape(Sizes<1, 1>()).broadcast(glimpse_size)) + .template cast<Scalar>()) + .cwiseMin( + maxi.reshape(Sizes<1, 1>()).broadcast(glimpse_size)) + .cwiseMax( + mini.reshape(Sizes<1, 1>()).broadcast(glimpse_size)); + } + } + + // Copy the part of the glimpse that cover the input image if any. + if (glimpse_width == 0 || glimpse_height == 0) { + continue; + } + output.template chip<3>(i) + .slice(base_offset, slice_extent) + .device(device) = + input.template chip<3>(i).slice(slice_offset, slice_extent); + } else { + output.template chip<3>(i).device(device) = + input.template chip<3>(i).slice(slice_offset, slice_extent); + } + } + } + + private: + const Index width_; + const Index height_; + const std::vector<IndexPair<float> > offsets_; + const bool normalized_; + const bool centered_; + const bool uniform_noise_; +}; +} + + +template <typename Input> +EIGEN_ALWAYS_INLINE +static const TensorCustomUnaryOp<const GlimpseExtractionOp<typename internal::traits<Input>::Index>, const Input> +ExtractGlimpses(const Input& input, + const typename internal::traits<Input>::Index width, + const typename internal::traits<Input>::Index height, + const std::vector<IndexPair<float> >& offsets, + const bool normalized = true, const bool centered = true, + const bool uniform_noise = true) +{ + EIGEN_STATIC_ASSERT(internal::traits<Input>::Layout == ColMajor, YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 4, YOU_MADE_A_PROGRAMMING_MISTAKE); + + typedef typename internal::traits<Input>::Index Index; + const GlimpseExtractionOp<Index> op(width, height, offsets, normalized, + centered, uniform_noise); + return input.customOp(op); +} + +} // end namespace Eigen + +#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_ATTENTION_H_ diff --git a/tensorflow/core/kernels/eigen_attention_test.cc b/tensorflow/core/kernels/eigen_attention_test.cc new file mode 100644 index 0000000000..7d5e0b71b5 --- /dev/null +++ b/tensorflow/core/kernels/eigen_attention_test.cc @@ -0,0 +1,107 @@ +/* Copyright 2015 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/kernels/eigen_attention.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/platform/test.h" + +namespace Eigen { + +namespace { +void EigenApprox(float a, float b) { + ASSERT_TRUE(std::abs(a - b) <= std::min(std::abs(a), std::abs(b)) * 1e-3); +} +} + +TEST(EigenAttentionTest, Simple) { + const ptrdiff_t depth = 3; + const ptrdiff_t batch = 10; + const ptrdiff_t rows = 32; + const ptrdiff_t cols = 48; + const ptrdiff_t glimpse_rows = 8; + const ptrdiff_t glimpse_cols = 6; + + Tensor<float, 4> input(depth, rows, cols, batch); + input.setRandom(); + + std::vector<IndexPair<float>> offsets; + offsets.resize(batch); + for (int i = 0; i < batch; ++i) { + offsets[i].first = (-5 + i) / 10.0f; + offsets[i].second = (5 - i) / 10.0f; + } + + Tensor<float, 4> result(depth, glimpse_rows, glimpse_cols, batch); + result = ExtractGlimpses(input, glimpse_rows, glimpse_cols, offsets); + + for (int b = 0; b < batch; ++b) { + for (int c = 0; c < glimpse_cols; ++c) { + ptrdiff_t source_c = + c + ((1.0f + offsets[b].second) * cols - glimpse_cols) / 2; + for (int r = 0; r < glimpse_rows; ++r) { + ptrdiff_t source_r = + r + ((1.0f + offsets[b].first) * rows - glimpse_rows) / 2; + for (int d = 0; d < depth; ++d) { + EigenApprox(result(d, r, c, b), input(d, source_r, source_c, b)); + } + } + } + } +} + +TEST(EigenAttentionTest, OutOfBoundsGlimpse) { + const ptrdiff_t depth = 3; + const ptrdiff_t batch = 10; + const ptrdiff_t rows = 32; + const ptrdiff_t cols = 48; + const ptrdiff_t glimpse_rows = 8; + const ptrdiff_t glimpse_cols = 6; + + Tensor<float, 4> input(depth, rows, cols, batch); + input.setRandom(); + + std::vector<IndexPair<float>> offsets; + offsets.resize(batch); + for (int i = 0; i < batch; ++i) { + offsets[i].first = (-5 + i) / 2.0f; + offsets[i].second = (5 - i) / 2.0f; + } + + Tensor<float, 4> result(depth, glimpse_rows, glimpse_cols, batch); + result = ExtractGlimpses(input, glimpse_rows, glimpse_cols, offsets); + + for (int b = 0; b < batch; ++b) { + for (int c = 0; c < glimpse_cols; ++c) { + ptrdiff_t source_c = + c + ((1.0f + offsets[b].second) * cols - glimpse_cols) / 2; + if (source_c < glimpse_cols / 2 || source_c >= cols - glimpse_cols / 2) { + continue; + } + for (int r = 0; r < glimpse_rows; ++r) { + ptrdiff_t source_r = + r + ((1.0f + offsets[b].first) * rows - glimpse_rows) / 2; + if (source_r < glimpse_rows / 2 || + source_r >= rows - glimpse_rows / 2) { + continue; + } + for (int d = 0; d < depth; ++d) { + EigenApprox(result(d, r, c, b), input(d, source_r, source_c, b)); + } + } + } + } +} + +} // namespace Eigen diff --git a/tensorflow/core/kernels/eigen_backward_cuboid_convolutions.h b/tensorflow/core/kernels/eigen_backward_cuboid_convolutions.h new file mode 100644 index 0000000000..937a0c5acb --- /dev/null +++ b/tensorflow/core/kernels/eigen_backward_cuboid_convolutions.h @@ -0,0 +1,539 @@ +/* Copyright 2015 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_BACKWARD_CUBOID_CONVOLUTIONS_H_ +#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_BACKWARD_CUBOID_CONVOLUTIONS_H_ + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/kernels/eigen_patch_3d.h" + +namespace Eigen { + +/** CuboidConvolutionBackwardInput + * \ingroup CXX11_NeuralNetworks_Module + * + * \brief Computes the backprop for the input of a 3D convolution. + * + * The output_backward parameter is expected to be a tensor with a rank of 4 or more (channels, depth, height, width, and optionally others) + * The kernel parameter is expected to be a 5D tensor (filters, channels, kernel_depth, kernel_height, kernel_width) + * output_backward and kernel have to be in the same layout. + * + * The dimensions of the result will be filters, depth, height, width (and others if applicable). + * + * It is possible to swap the order of the depth, width and height dimensions provided that the same order is used in the input, the kernel, and the output. + * + * All dimension orders above are given for col-major, and should be reversed for row-major. + */ + +template <typename OutputBackward, typename Kernel> +EIGEN_ALWAYS_INLINE static const typename internal::conditional< + internal::traits<OutputBackward>::Layout == ColMajor, + TensorReshapingOp< + const DSizes<typename internal::traits<OutputBackward>::Index, + internal::traits<OutputBackward>::NumDimensions>, + const TensorContractionOp< + const array< IndexPair<typename internal::traits<OutputBackward>::Index>, 2>, + const TensorReshapingOp< + const DSizes< typename internal::traits<OutputBackward>::Index, 3>, + const TensorReverseOp<const array<bool, 5>, const Kernel> + >, + const TensorReshapingOp< + const DSizes< typename internal::traits<OutputBackward>::Index, 3>, + const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const OutputBackward> + > + > + >, + TensorReshapingOp< + const DSizes<typename internal::traits<OutputBackward>::Index, + internal::traits<OutputBackward>::NumDimensions>, + const TensorContractionOp< + const array< IndexPair<typename internal::traits<OutputBackward>::Index>, 2>, + const TensorReshapingOp< + const DSizes< typename internal::traits<OutputBackward>::Index, 3>, + const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const OutputBackward> + >, + const TensorReshapingOp< + const DSizes<typename internal::traits<OutputBackward>::Index, 3>, + const TensorReverseOp<const array<bool, 5>, const Kernel> + > + > + > +>::type +CuboidConvolutionBackwardInput( + const Kernel& kernel, const OutputBackward& output_backward, + typename internal::traits<OutputBackward>::Index inputPlanes, + typename internal::traits<OutputBackward>::Index inputRows, + typename internal::traits<OutputBackward>::Index inputCols, + const DenseIndex stridePlanes = 1, const DenseIndex strideRows = 1, + const DenseIndex strideCols = 1) { + typedef typename internal::traits<OutputBackward>::Index TensorIndex; + const TensorRef<const Tensor<typename internal::traits<Kernel>::Scalar, internal::traits<Kernel>::NumDimensions, internal::traits<Kernel>::Layout, TensorIndex> > kern(kernel); + const TensorRef<const Tensor<typename internal::traits<OutputBackward>::Scalar, internal::traits<OutputBackward>::NumDimensions, internal::traits<OutputBackward>::Layout, TensorIndex> > out(output_backward); + + EIGEN_STATIC_ASSERT(internal::traits<Kernel>::Layout == internal::traits<OutputBackward>::Layout, YOU_MADE_A_PROGRAMMING_MISTAKE); + + static const bool isColMajor = (internal::traits<OutputBackward>::Layout == ColMajor); + + static const int NumDims = internal::traits<OutputBackward>::NumDimensions; + + // Number of filters to apply. This is the same as the output depth of the result + const TensorIndex kernelFilters = isColMajor ? kern.dimensions()[0] : kern.dimensions()[4]; + // Number of channels. This is the same as the input depth. + const TensorIndex kernelChannels = isColMajor ? kern.dimensions()[1] : kern.dimensions()[3]; + const TensorIndex kernelPlanes = isColMajor ? kern.dimensions()[2] : kern.dimensions()[2]; + const TensorIndex kernelRows = isColMajor ? kern.dimensions()[3] : kern.dimensions()[1]; + const TensorIndex kernelCols = isColMajor ? kern.dimensions()[4] : kern.dimensions()[0]; + + const TensorIndex outputPlanes = isColMajor ? out.dimensions()[1] : out.dimensions()[NumDims - 2]; + const TensorIndex outputRows = isColMajor ? out.dimensions()[2] : out.dimensions()[NumDims - 3]; + const TensorIndex outputCols = isColMajor ? out.dimensions()[3] : out.dimensions()[NumDims - 4]; + + TensorIndex forward_pad_z, forward_pad_y, forward_pad_x; + const TensorIndex size_z = ceil(inputPlanes / static_cast<float>(stridePlanes)); + const TensorIndex size_y = ceil(inputRows / static_cast<float>(strideRows)); + const TensorIndex size_x = ceil(inputCols / static_cast<float>(strideCols)); + + // Infer padding type. + if (size_z == outputPlanes && size_y == outputRows && size_x == outputCols) { + // SAME padding. + const TensorIndex dz = size_z * stridePlanes + kernelPlanes - 1 - inputPlanes; + const TensorIndex dy = size_y * strideRows + kernelRows - 1 - inputRows; + const TensorIndex dx = size_x * strideCols + kernelCols - 1 - inputCols; + + forward_pad_z = dz - dz / 2; + forward_pad_y = dy - dy / 2; + forward_pad_x = dx - dx / 2; + } else { + // VALID padding. + forward_pad_z = 0; + forward_pad_y = 0; + forward_pad_x = 0; + } + const TensorIndex padding_ztop = kernelPlanes - 1 - forward_pad_z; + const TensorIndex padding_top = kernelRows - 1 - forward_pad_y; + const TensorIndex padding_left = kernelCols - 1 - forward_pad_x; + + const TensorIndex padding_zbottom = inputPlanes + kernelPlanes - 1 - (outputPlanes - 1) * stridePlanes - 1 - padding_ztop; + const TensorIndex padding_bottom = inputRows + kernelRows - 1 - (outputRows - 1) * strideRows - 1 - padding_top; + const TensorIndex padding_right = inputCols + kernelCols - 1 - (outputCols - 1) * strideCols - 1 - padding_left; + + eigen_assert(padding_ztop >= 0); + eigen_assert(padding_zbottom >= 0); + eigen_assert(padding_top >= 0); + eigen_assert(padding_left >= 0); + eigen_assert(padding_bottom >= 0); + eigen_assert(padding_right >= 0); + + // The kernel has dimensions filters X channels X patch_planes X patch_rows X patch_cols. + // We need to reverse the kernel along the spatial dimensions. + array<bool, 5> kernel_reverse; + if (isColMajor) { + kernel_reverse[0] = false; + kernel_reverse[1] = false; + kernel_reverse[2] = true; + kernel_reverse[3] = true; + kernel_reverse[4] = true; + } else { + kernel_reverse[0] = true; + kernel_reverse[1] = true; + kernel_reverse[2] = true; + kernel_reverse[3] = false; + kernel_reverse[4] = false; + } + + DSizes<TensorIndex, 3> kernel_dims; + if (isColMajor) { + kernel_dims[0] = kernelFilters; + kernel_dims[1] = kernelChannels; + kernel_dims[2] = kernelRows * kernelCols * kernelPlanes; + } else { + kernel_dims[0] = kernelRows * kernelCols * kernelPlanes; + kernel_dims[1] = kernelChannels; + kernel_dims[2] = kernelFilters; + } + + // The output_backward has dimensions out_depth X out_planes X out_rows X out_cols X OTHERS + // When we extract the image patches from output_backward, it will have dimensions: + // out_depth X (patch_planes * patch_rows * patch_cols) X (input_planes * input_rows * input_cols * OTHERS) + DSizes<TensorIndex, 3> pre_contract_dims; + if (isColMajor) { + pre_contract_dims[0] = kernelFilters; + pre_contract_dims[1] = kernelRows * kernelCols * kernelPlanes; + pre_contract_dims[2] = inputRows * inputCols * inputPlanes; + for (int i = 4; i < NumDims; ++i) { + pre_contract_dims[2] *= out.dimension(i); + } + } else { + pre_contract_dims[2] = kernelFilters; + pre_contract_dims[1] = kernelRows * kernelCols * kernelPlanes; + pre_contract_dims[0] = inputRows * inputCols * inputPlanes; + for (int i = 0; i < NumDims - 4; ++i) { + pre_contract_dims[0] *= out.dimension(i); + } + } + + // We will contract along dimensions (0, 2) in kernel and (0, 1) in + // output_backward, if this is col-major, and + // dimensions (0, 2) in kernel and (1, 2) in output_backward, if this row-major. + array<IndexPair<TensorIndex>, 2> contract_dims; + if (isColMajor) { + // col-major: kernel.contract(output.patches) + contract_dims[0] = IndexPair<TensorIndex>(0, 0); + contract_dims[1] = IndexPair<TensorIndex>(2, 1); + } else { + // row-major: output.patches.contract(kernel) + contract_dims[0] = IndexPair<TensorIndex>(1, 0); + contract_dims[1] = IndexPair<TensorIndex>(2, 2); + } + + // Post contraction, the dimensions of the input_backprop is + // channels X input_planes X input_rows X input_cols X OTHERS + DSizes<TensorIndex, NumDims> post_contract_dims; + if (isColMajor) { + post_contract_dims[0] = kernelChannels; + post_contract_dims[1] = inputPlanes; + post_contract_dims[2] = inputRows; + post_contract_dims[3] = inputCols; + for (int i = 4; i < NumDims; ++i) { + post_contract_dims[i] = out.dimension(i); + } + } else { + post_contract_dims[NumDims - 1] = kernelChannels; + post_contract_dims[NumDims - 2] = inputPlanes; + post_contract_dims[NumDims - 3] = inputRows; + post_contract_dims[NumDims - 4] = inputCols; + for (int i = 0; i < NumDims - 4; ++i) { + post_contract_dims[i] = out.dimension(i); + } + } + + DSizes<TensorIndex, NumDims> strides; + for (int i = 0; i < NumDims; i++) { + strides[i] = 1; + } + if (isColMajor) { + strides[1] = stridePlanes; + strides[2] = strideRows; + strides[3] = strideCols; + } else { + strides[NumDims - 2] = stridePlanes; + strides[NumDims - 3] = strideRows; + strides[NumDims - 4] = strideCols; + } + + return choose( + Cond<internal::traits<OutputBackward>::Layout == ColMajor>(), + kernel.reverse(kernel_reverse) + .reshape(kernel_dims) + .contract( + output_backward.extract_volume_patches(kernelPlanes, kernelRows, kernelCols, + 1, 1, 1, stridePlanes, strideRows, strideCols, + padding_ztop, padding_zbottom, + padding_top, padding_bottom, + padding_left, padding_right) + .reshape(pre_contract_dims), + contract_dims) + .reshape(post_contract_dims), + output_backward.extract_volume_patches(kernelPlanes, kernelRows, kernelCols, + 1, 1, 1, stridePlanes, strideRows, strideCols, + padding_ztop, padding_zbottom, + padding_top, padding_bottom, + padding_left, padding_right) + .reshape(pre_contract_dims) + .contract(kernel.reverse(kernel_reverse).reshape(kernel_dims), + contract_dims) + .reshape(post_contract_dims)); +} + + +/** CuboidConvolutionBackwardKernel + * \ingroup CXX11_NeuralNetworks_Module + * + * \brief Computes the backprop for the filter of a 3D convolution. + * + * The output_backward parameter is expected to be a tensor with a rank of 4 or more (channels, depth, height, width, and optionally others) + * The kernel parameter is expected to be a 4D tensor (filters, channels, kernel_depth, kernel_height, kernel_width) + * output_backward and kernel have to be in the same layout. + * + * The dimensions of the result will be filters, depth, height, width (and others if applicable). + * + * It is possible to swap the order of the depth, width and height dimensions provided that the same order is used in the input, the kernel, and the output. + * + * All dimension orders above are given for col-major, and should be reversed for row-major. + */ +template <typename OutputBackward, typename Input> +EIGEN_ALWAYS_INLINE static const typename internal::conditional< + internal::traits<OutputBackward>::Layout == ColMajor, + const TensorShufflingOp< + const array<typename internal::traits<OutputBackward>::Index, 5>, + const TensorReverseOp< + const array<bool, 5>, + const TensorReshapingOp< + const DSizes<typename internal::traits<OutputBackward>::Index, 5>, + const TensorContractionOp< + const array< IndexPair<typename internal::traits<Input>::Index>, 2>, + const TensorReshapingOp< + const DSizes<typename internal::traits<Input>::Index, 3>, + const Input>, + const TensorReshapingOp< + const DSizes< typename internal::traits<OutputBackward>::Index, 4>, + const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const OutputBackward> + > + > + > + > + >, + const TensorShufflingOp< + const array<typename internal::traits<OutputBackward>::Index, 5>, + const TensorReverseOp< + const array<bool, 5>, + const TensorReshapingOp< + const DSizes<typename internal::traits<OutputBackward>::Index, 5>, + const TensorContractionOp< + const array< IndexPair<typename internal::traits<Input>::Index>, 2>, + const TensorReshapingOp< + const DSizes< typename internal::traits<OutputBackward>::Index, 4>, + const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const OutputBackward> + >, + const TensorReshapingOp< + const DSizes<typename internal::traits<Input>::Index, 3>, + const Input + > + > + > + > + > +>::type +CuboidConvolutionBackwardKernel( + const Input& input, const OutputBackward& output_backward, + typename internal::traits<Input>::Index kernelPlanes, + typename internal::traits<Input>::Index kernelRows, + typename internal::traits<Input>::Index kernelCols, + const DenseIndex stridePlanes = 1, + const DenseIndex strideRows = 1, + const DenseIndex strideCols = 1) { + typedef typename internal::traits<Input>::Index TensorIndex; + TensorRef<Tensor<typename internal::traits<Input>::Scalar, internal::traits<Input>::NumDimensions, internal::traits<Input>::Layout, TensorIndex> > in(input); + TensorRef<Tensor<typename internal::traits<OutputBackward>::Scalar, internal::traits<OutputBackward>::NumDimensions, internal::traits<OutputBackward>::Layout, TensorIndex> > out(output_backward); + + EIGEN_STATIC_ASSERT(internal::traits<Input>::Layout == internal::traits<OutputBackward>::Layout, YOU_MADE_A_PROGRAMMING_MISTAKE); + + static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor); + + static const int NumDims = internal::traits<Input>::NumDimensions; + EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == internal::traits<OutputBackward>::NumDimensions, YOU_MADE_A_PROGRAMMING_MISTAKE); + + const TensorIndex inputPlanes = isColMajor ? in.dimension(1) : in.dimension(NumDims - 2); + const TensorIndex inputRows = isColMajor ? in.dimension(2) : in.dimension(NumDims - 3); + const TensorIndex inputCols = isColMajor ? in.dimension(3) : in.dimension(NumDims - 4); + + const TensorIndex outputPlanes = isColMajor ? out.dimension(1) : out.dimension(NumDims - 2); + const TensorIndex outputRows = isColMajor ? out.dimension(2) : out.dimension(NumDims - 3); + const TensorIndex outputCols = isColMajor ? out.dimension(3) : out.dimension(NumDims - 4); + + const TensorIndex kernelFilters = isColMajor ? out.dimension(0) : out.dimension(NumDims - 1); + const TensorIndex kernelChannels = isColMajor ? in.dimension(0) : in.dimension(NumDims - 1); + + TensorIndex forward_pad_z, forward_pad_y, forward_pad_x; + const TensorIndex size_z = ceil(inputPlanes / static_cast<float>(stridePlanes)); + const TensorIndex size_y = ceil(inputRows / static_cast<float>(strideRows)); + const TensorIndex size_x = ceil(inputCols / static_cast<float>(strideCols)); + + // Infer padding type. + if (size_z == outputPlanes && size_y == outputRows && size_x == outputCols) { + // SAME padding. + const TensorIndex dz = size_z * stridePlanes + kernelPlanes - 1 - inputPlanes; + const TensorIndex dy = size_y * strideRows + kernelRows - 1 - inputRows; + const TensorIndex dx = size_x * strideCols + kernelCols - 1 - inputCols; + + forward_pad_z = dz - dz / 2; + forward_pad_y = dy - dy / 2; + forward_pad_x = dx - dx / 2; + } else { + // VALID padding. + forward_pad_z = 0; + forward_pad_y = 0; + forward_pad_x = 0; + } + + const TensorIndex padding_ztop = kernelPlanes - 1 - forward_pad_z; + const TensorIndex padding_top = kernelRows - 1 - forward_pad_y; + const TensorIndex padding_left = kernelCols - 1 - forward_pad_x; + + const TensorIndex padding_zbottom = inputPlanes + kernelPlanes - 1 - (outputPlanes - 1) * stridePlanes - 1 - padding_ztop; + const TensorIndex padding_bottom = inputRows + kernelRows - 1 - (outputRows - 1) * strideRows - 1 - padding_top; + const TensorIndex padding_right = inputCols + kernelCols - 1 - (outputCols - 1) * strideCols - 1 - padding_left; + + eigen_assert(padding_ztop >= 0); + eigen_assert(padding_zbottom >= 0); + eigen_assert(padding_top >= 0); + eigen_assert(padding_left >= 0); + eigen_assert(padding_bottom >= 0); + eigen_assert(padding_right >= 0); + + // The output_backward has dimensions out_depth X out_plaens X out_rows X out_cols X OTHERS + // When we extract the image patches from output_backward (with input as the + // kernel), it will have dimensions + // (out_depth) X (input_planes * input_rows * input_cols) X (kernel_planes * kernel_rows * kernel_cols) X OTHERS + DSizes<TensorIndex, 4> pre_contract_dims; + if (isColMajor) { + pre_contract_dims[0] = kernelFilters; + pre_contract_dims[1] = inputRows * inputCols * inputPlanes; + pre_contract_dims[2] = kernelRows * kernelCols * kernelPlanes; + pre_contract_dims[3] = 1; + for (int i = 4; i < NumDims; ++i) { + pre_contract_dims[3] *= out.dimension(i); + } + } else { + pre_contract_dims[3] = kernelFilters; + pre_contract_dims[2] = inputRows * inputCols * inputPlanes; + pre_contract_dims[1] = kernelRows * kernelCols * kernelPlanes; + pre_contract_dims[0] = 1; + for (int i = 0; i < NumDims - 4; ++i) { + pre_contract_dims[0] *= out.dimension(i); + } + } + + // The input has dimensions in_depth X (input_planes * input_rows * input_cols) X OTHERS + DSizes<TensorIndex, 3> input_dims; + if (isColMajor) { + input_dims[0] = kernelChannels; + input_dims[1] = inputRows * inputCols * inputPlanes; + input_dims[2] = 1; + for (int i = 4; i < NumDims; ++i) { + input_dims[2] *= in.dimension(i); + } + eigen_assert(input_dims[2] == pre_contract_dims[3]); + } else { + input_dims[2] = kernelChannels; + input_dims[1] = inputRows * inputCols * inputPlanes; + input_dims[0] = 1; + for (int i = 0; i < NumDims - 4; ++i) { + input_dims[0] *= in.dimension(i); + } + eigen_assert(input_dims[0] == pre_contract_dims[0]); + } + + // We will contract along dimensions (1, 2) in in and (1, 3) in out, if + // this is col-major. + // For row-major, it's dimensions (0, 1) in in and (0, 2) in out. + array<IndexPair<TensorIndex>, 2> contract_dims; + if (isColMajor) { + // col-major: in.contract(output.patches) + contract_dims[0] = IndexPair<TensorIndex>(1, 1); + contract_dims[1] = IndexPair<TensorIndex>(2, 3); + } else { + // row-major: output.patches.contract(in) + contract_dims[0] = IndexPair<TensorIndex>(0, 0); + contract_dims[1] = IndexPair<TensorIndex>(2, 1); + } + + // After the contraction, the kernel will have dimension + // in_depth X out_depth X kernel_patches X kernel_rows X kernel_cols + // We will need to shuffle the first two dimensions and reverse the spatial dimensions. + // The end shape is: + // out_depth X in_shape X kernel_planes X kernel_rows X kernel_cols + + // This is the shape of the kernel *before* the shuffling. + DSizes<TensorIndex, 5> kernel_dims; + if (isColMajor) { + kernel_dims[0] = kernelChannels; + kernel_dims[1] = kernelFilters; + kernel_dims[2] = kernelPlanes; + kernel_dims[3] = kernelRows; + kernel_dims[4] = kernelCols; + } else { + kernel_dims[0] = kernelCols; + kernel_dims[1] = kernelRows; + kernel_dims[2] = kernelPlanes; + kernel_dims[3] = kernelFilters; + kernel_dims[4] = kernelChannels; + } + + // Flip filters and channels. + array<TensorIndex, 5> kernel_shuffle; + if (isColMajor) { + kernel_shuffle[0] = 1; + kernel_shuffle[1] = 0; + kernel_shuffle[2] = 2; + kernel_shuffle[3] = 3; + kernel_shuffle[4] = 4; + } else { + kernel_shuffle[0] = 0; + kernel_shuffle[1] = 1; + kernel_shuffle[2] = 2; + kernel_shuffle[3] = 4; + kernel_shuffle[4] = 3; + } + + // Reverse the spatial dimensions. + array<bool, 5> kernel_reverse; + if (isColMajor) { + kernel_reverse[0] = false; + kernel_reverse[1] = false; + kernel_reverse[2] = true; + kernel_reverse[3] = true; + kernel_reverse[4] = true; + } else { + kernel_reverse[0] = true; + kernel_reverse[1] = true; + kernel_reverse[2] = true; + kernel_reverse[3] = false; + kernel_reverse[4] = false; + } + + DSizes<TensorIndex, NumDims> strides; + for (int i = 0; i < NumDims; i++) { + strides[i] = 1; + } + if (isColMajor) { + strides[1] = stridePlanes; + strides[2] = strideRows; + strides[3] = strideCols; + } else { + strides[NumDims - 2] = stridePlanes; + strides[NumDims - 3] = strideRows; + strides[NumDims - 4] = strideCols; + } + return choose( + Cond<internal::traits<Input>::Layout == ColMajor>(), + input.reshape(input_dims) + .contract( + output_backward.extract_volume_patches( + inputPlanes, inputRows, inputCols, 1, + 1, 1, stridePlanes, strideRows, strideCols, + + padding_ztop, padding_zbottom, padding_top, + padding_bottom, padding_left, padding_right) + .reshape(pre_contract_dims), + contract_dims) + .reshape(kernel_dims) + .reverse(kernel_reverse) + .shuffle(kernel_shuffle), + output_backward.extract_volume_patches( + inputPlanes, inputRows, inputCols, 1, 1, 1, + stridePlanes, strideRows, strideCols, padding_ztop, + padding_zbottom, padding_top, padding_bottom, + padding_left, padding_right) + .reshape(pre_contract_dims) + .contract(input.reshape(input_dims), contract_dims) + .reshape(kernel_dims) + .reverse(kernel_reverse) + .shuffle(kernel_shuffle)); +} + +} // end namespace Eigen + +#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_BACKWARD_CUBOID_CONVOLUTIONS_H_ diff --git a/tensorflow/core/kernels/eigen_backward_spatial_convolutions.h b/tensorflow/core/kernels/eigen_backward_spatial_convolutions.h new file mode 100644 index 0000000000..7a5a94bb6f --- /dev/null +++ b/tensorflow/core/kernels/eigen_backward_spatial_convolutions.h @@ -0,0 +1,359 @@ +/* Copyright 2015 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_BACKWARD_SPATIAL_CONVOLUTIONS_H_ +#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_BACKWARD_SPATIAL_CONVOLUTIONS_H_ + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +namespace Eigen { + +/** SpatialConvolutionBackwardInput + * \ingroup CXX11_NeuralNetworks_Module + * + * \brief Computes the backprop for the input of a 2D convolution. + * + * The output_backward parameter is expected to be a tensor with a rank of 3 or more (channels, height, width, and optionally others) + * The kernel parameter is expected to be a 4D tensor (filters, channels, kernel_height, kernel_width) + * The output_backward and the kernel must both be in col-major layout. The result will also be in col-major layout. + * + * If in_stride > 1, then applies convolution with holes (aka atrous convolution), sampling every in_stride input pixels. + * + * The result can be assigned to a tensor of rank equal to the rank of the output_backward. The dimensions of the result will be filters, height, width (and others if applicable). + * + * It is possible to swap the order of the width and height dimensions provided that the same order is used in the input, the kernel, and the output. + * + */ + +template <typename OutputBackward, typename Kernel> +EIGEN_ALWAYS_INLINE +static const typename internal::conditional< + internal::traits<OutputBackward>::Layout == ColMajor, + TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, internal::traits<OutputBackward>::NumDimensions>, const TensorContractionOp<const array<IndexPair<typename internal::traits<OutputBackward>::Index>, 2>, const TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, 3>, const TensorReverseOp<const array<bool, 4>, const Kernel> >, const TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, 3>, const TensorImagePatchOp<Dynamic, Dynamic, const OutputBackward> > > >, + TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, internal::traits<OutputBackward>::NumDimensions>, const TensorContractionOp<const array<IndexPair<typename internal::traits<OutputBackward>::Index>, 2>, const TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, 3>, const TensorImagePatchOp<Dynamic, Dynamic, const OutputBackward> >, const TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, 3>, const TensorReverseOp<const array<bool, 4>, const Kernel> > > > >::type +SpatialConvolutionBackwardInput(const Kernel& kernel, const OutputBackward& output_backward, typename internal::traits<OutputBackward>::Index inputRows, typename internal::traits<OutputBackward>::Index inputCols, const DenseIndex stride = 1, const DenseIndex in_stride = 1) { + + typedef typename internal::traits<OutputBackward>::Index TensorIndex; + TensorRef<Tensor<typename internal::traits<Kernel>::Scalar, internal::traits<Kernel>::NumDimensions, internal::traits<Kernel>::Layout, TensorIndex> > kern(kernel); + TensorRef<Tensor<typename internal::traits<OutputBackward>::Scalar, internal::traits<OutputBackward>::NumDimensions, internal::traits<OutputBackward>::Layout, TensorIndex> > out(output_backward); + + EIGEN_STATIC_ASSERT(internal::traits<Kernel>::Layout == internal::traits<OutputBackward>::Layout, YOU_MADE_A_PROGRAMMING_MISTAKE); + + static const bool isColMajor = (internal::traits<OutputBackward>::Layout == ColMajor); + + static const int NumDims = internal::traits<OutputBackward>::NumDimensions; + + // Number of filters to apply. This is the same as the output depth of the result + const TensorIndex kernelFilters = isColMajor ? kern.dimensions()[0] : kern.dimensions()[3]; + // Number of channels. This is the same as the input depth. + const TensorIndex kernelChannels = isColMajor ? kern.dimensions()[1] : kern.dimensions()[2]; + const TensorIndex kernelRows = isColMajor ? kern.dimensions()[2] : kern.dimensions()[1]; + const TensorIndex kernelCols = isColMajor ? kern.dimensions()[3] : kern.dimensions()[0]; + + // This is the effective kernel size, taking into account the (in_stride - 1) zero-values + // inserted between consecutive kernel elements in atrous convolution + const TensorIndex kernelRowsEff = kernelRows + (kernelRows - 1) * (in_stride - 1); + const TensorIndex kernelColsEff = kernelCols + (kernelCols - 1) * (in_stride - 1); + + const TensorIndex outputRows = isColMajor ? output_backward.dimension(1) : output_backward.dimension(NumDims - 2); + const TensorIndex outputCols = isColMajor ? output_backward.dimension(2) : output_backward.dimension(NumDims - 3); + + // Computing the forward padding + const TensorIndex forward_pad_top = ((outputRows - 1) * stride + kernelRowsEff - inputRows) / 2; + const TensorIndex forward_pad_left = ((outputCols - 1) * stride + kernelColsEff - inputCols) / 2; + + const TensorIndex padding_top = kernelRowsEff - 1 - forward_pad_top; + const TensorIndex padding_left = kernelColsEff - 1 - forward_pad_left; + const TensorIndex padding_bottom = inputRows + kernelRowsEff - 1 - (outputRows - 1) * stride - 1 - padding_top; + const TensorIndex padding_right = inputCols + kernelColsEff - 1 - (outputCols - 1) * stride - 1 - padding_left; + + eigen_assert(padding_top >= 0); + eigen_assert(padding_left >= 0); + eigen_assert(padding_bottom >= 0); + eigen_assert(padding_right >= 0); + + // The kernel has dimensions filters X channels X patch_rows X patch_cols + // We need to reverse the kernel along dimensions corresponding to rows and + // cols. + // TODO(yangke): we can make things slightly faster by collapsing the dimensions + // where we don't reverse. Try that once we have a faster compiler. + array<bool, 4> kernel_reverse; + if (isColMajor) { + kernel_reverse[0] = false; + kernel_reverse[1] = false; + kernel_reverse[2] = true; + kernel_reverse[3] = true; + } else { + kernel_reverse[0] = true; + kernel_reverse[1] = true; + kernel_reverse[2] = false; + kernel_reverse[3] = false; + } + + DSizes<TensorIndex, 3> kernel_dims; + if (isColMajor) { + kernel_dims[0] = kernelFilters; + kernel_dims[1] = kernelChannels; + kernel_dims[2] = kernelRows * kernelCols; + } else { + kernel_dims[0] = kernelRows * kernelCols; + kernel_dims[1] = kernelChannels; + kernel_dims[2] = kernelFilters; + } + + // The output_backward has dimensions out_depth X out_rows X out_cols X OTHERS + // When we extract the image patches from output_backward, it will have dimensions + // out_depth X (patch_rows * patch_cols) X (input_rows * input_cols * OTHERS) + DSizes<TensorIndex, 3> pre_contract_dims; + if (isColMajor) { + pre_contract_dims[0] = kernelFilters; + pre_contract_dims[1] = kernelRows * kernelCols; + pre_contract_dims[2] = inputRows * inputCols; + for (int i = 3; i < NumDims; ++i) { + pre_contract_dims[2] *= out.dimension(i); + } + } else { + pre_contract_dims[2] = kernelFilters; + pre_contract_dims[1] = kernelRows * kernelCols; + pre_contract_dims[0] = inputRows * inputCols; + for (int i = 0; i < NumDims - 3; ++i) { + pre_contract_dims[0] *= out.dimension(i); + } + } + + // We will contract along dimensions (0, 2) in kernel and (0, 1) in + // output_backward, if this is col-major, and + // dimensions (0, 2) in kernel and (1, 2) in output_backward, if this row-major. + array<IndexPair<TensorIndex>, 2> contract_dims; + if (isColMajor) { + // col-major: kernel.contract(output.patches) + contract_dims[0] = IndexPair<TensorIndex>(0, 0); + contract_dims[1] = IndexPair<TensorIndex>(2, 1); + } else { + // row-major: output.patches.contract(kernel) + contract_dims[0] = IndexPair<TensorIndex>(1, 0); + contract_dims[1] = IndexPair<TensorIndex>(2, 2); + } + + // Post contraction, the dimensions of the input_backprop is + // channels X input_rows X input_cols X OTHERS + DSizes<TensorIndex, NumDims> post_contract_dims; + if (isColMajor) { + post_contract_dims[0] = kernelChannels; + post_contract_dims[1] = inputRows; + post_contract_dims[2] = inputCols; + for (int i = 3; i < NumDims; ++i) { + post_contract_dims[i] = out.dimension(i); + } + } else { + post_contract_dims[NumDims - 1] = kernelChannels; + post_contract_dims[NumDims - 2] = inputRows; + post_contract_dims[NumDims - 3] = inputCols; + for (int i = 0; i < NumDims - 3; ++i) { + post_contract_dims[i] = out.dimension(i); + } + } + + return choose(Cond<internal::traits<OutputBackward>::Layout == ColMajor>(), + kernel.reverse(kernel_reverse).reshape(kernel_dims).contract(output_backward.extract_image_patches(kernelRows, kernelCols, 1, 1, in_stride, in_stride, stride, stride, padding_top, padding_bottom, padding_left, padding_right, 0).reshape(pre_contract_dims), contract_dims).reshape(post_contract_dims), + output_backward.extract_image_patches(kernelRows, kernelCols, 1, 1, in_stride, in_stride, stride, stride, padding_top, padding_bottom, padding_left, padding_right, 0).reshape(pre_contract_dims).contract(kernel.reverse(kernel_reverse).reshape(kernel_dims), contract_dims).reshape(post_contract_dims)); +} + + +/** SpatialConvolutionBackwardKernel + * \ingroup CXX11_NeuralNetworks_Module + * + * \brief Computes the backprop for the filter of a 2D convolution. + * + * The output_backward parameter is expected to be a tensor with a rank of 3 or more (channels, height, width, and optionally others) + * The kernel parameter is expected to be a 4D tensor (filters, channels, kernel_height, kernel_width) + * The output_backward and the kernel must both be in col-major layout. The result will also be in col-major layout. + * + * If in_stride > 1, then applies convolution with holes (aka atrous convolution), sampling every in_stride input pixels. + * + * The result can be assigned to a tensor of rank equal to the rank of the output_backward. The dimensions of the result will be filters, height, width (and others if applicable). + * + * It is possible to swap the order of the width and height dimensions provided that the same order is used in the input, the kernel, and the output. + * + */ +// TODO(gpapan): Resolve a bug in TensorContractionInputMapper at SpatialConvolutions.h that yangke circumvented by using .reshape().reshape(). +// This can significantly accelerate SpatialConvolutionBackwardKernel. + +template <typename OutputBackward, typename Input> +EIGEN_ALWAYS_INLINE +static const typename internal::conditional< + internal::traits<OutputBackward>::Layout == ColMajor, + const TensorShufflingOp<const array<typename internal::traits<OutputBackward>::Index, 4>, const TensorReverseOp<const array<bool, 4>, const TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, 4>, const TensorContractionOp<const array<IndexPair<typename internal::traits<Input>::Index>, 2>, const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 3>, const Input>, const TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, 4>, const TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, 4>, const TensorImagePatchOp<Dynamic, Dynamic, const OutputBackward> > > > > > >, + const TensorShufflingOp<const array<typename internal::traits<OutputBackward>::Index, 4>, const TensorReverseOp<const array<bool, 4>, const TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, 4>, const TensorContractionOp<const array<IndexPair<typename internal::traits<Input>::Index>, 2>, const TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, 4>, const TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, 4>, const TensorImagePatchOp<Dynamic, Dynamic, const OutputBackward> > >, const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 3>, const Input> > > > > >::type +SpatialConvolutionBackwardKernel(const Input& input, const OutputBackward& output_backward, typename internal::traits<Input>::Index kernelRows, typename internal::traits<Input>::Index kernelCols, const DenseIndex stride = 1, const DenseIndex in_stride = 1) { + + typedef typename internal::traits<Input>::Index TensorIndex; + TensorRef<Tensor<typename internal::traits<Input>::Scalar, internal::traits<Input>::NumDimensions, internal::traits<Input>::Layout, TensorIndex> > in(input); + TensorRef<Tensor<typename internal::traits<OutputBackward>::Scalar, internal::traits<OutputBackward>::NumDimensions, internal::traits<OutputBackward>::Layout, TensorIndex> > out(output_backward); + + EIGEN_STATIC_ASSERT(internal::traits<Input>::Layout == internal::traits<OutputBackward>::Layout, YOU_MADE_A_PROGRAMMING_MISTAKE); + + // stride and in_stride cannot both be larger than 1 + eigen_assert(!(stride > 1 && in_stride > 1)); + + static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor); + + static const int NumDims = internal::traits<Input>::NumDimensions; + EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == internal::traits<OutputBackward>::NumDimensions, YOU_MADE_A_PROGRAMMING_MISTAKE); + + const TensorIndex inputRows = isColMajor ? in.dimension(1) : in.dimension(NumDims - 2); + const TensorIndex inputCols = isColMajor ? in.dimension(2) : in.dimension(NumDims - 3); + + const TensorIndex outputRows = isColMajor ? output_backward.dimension(1) : output_backward.dimension(NumDims - 2); + const TensorIndex outputCols = isColMajor ? output_backward.dimension(2) : output_backward.dimension(NumDims - 3); + + // Number of filters to apply. This is the same as the output depth of the result + const TensorIndex kernelFilters = isColMajor ? out.dimensions()[0] : out.dimensions()[NumDims - 1]; + + // Number of channels. This is the same as the input depth. + const TensorIndex kernelChannels = isColMajor ? in.dimensions()[0] : in.dimensions()[NumDims - 1]; + + // This is the effective kernel size, taking into account the (in_stride - 1) zero-values + // inserted between consecutive kernel elements in atrous convolution + const TensorIndex kernelRowsEff = kernelRows + (kernelRows - 1) * (in_stride - 1); + const TensorIndex kernelColsEff = kernelCols + (kernelCols - 1) * (in_stride - 1); + + // Computing the forward padding + const TensorIndex forward_pad_top = ((outputRows - 1) * stride + kernelRowsEff - inputRows) / 2; + const TensorIndex forward_pad_left = ((outputCols - 1) * stride + kernelColsEff - inputCols) / 2; + + // TODO: factor out the padding computation. + const TensorIndex padding_top = kernelRowsEff - 1 - forward_pad_top; + const TensorIndex padding_left = kernelColsEff - 1 - forward_pad_left; + const TensorIndex padding_bottom = inputRows + kernelRowsEff - 1 - (outputRows - 1) * stride - 1 - padding_top; + const TensorIndex padding_right = inputCols + kernelColsEff - 1 - (outputCols - 1) * stride - 1 - padding_left; + + eigen_assert(padding_top >= 0); + eigen_assert(padding_left >= 0); + eigen_assert(padding_bottom >= 0); + eigen_assert(padding_right >= 0); + + // The output_backward has dimensions out_depth X out_rows X out_cols X OTHERS + // When we extract the image patches from output_backward (with input as the + // kernel), it will have dimensions + // (out_depth) X (input_rows * input_cols) X (kernel_rows * kernel_cols) X OTHERS + DSizes<TensorIndex, 4> pre_contract_dims; + if (isColMajor) { + pre_contract_dims[0] = kernelFilters; + pre_contract_dims[1] = inputRows * inputCols; + pre_contract_dims[2] = kernelRows * kernelCols; + pre_contract_dims[3] = 1; + for (int i = 3; i < NumDims; ++i) { + pre_contract_dims[3] *= out.dimension(i); + } + } else { + pre_contract_dims[3] = kernelFilters; + pre_contract_dims[2] = inputRows * inputCols; + pre_contract_dims[1] = kernelRows * kernelCols; + pre_contract_dims[0] = 1; + for (int i = 0; i < NumDims - 3; ++i) { + pre_contract_dims[0] *= out.dimension(i); + } + } + + // The input has dimensions in_depth X (input_rows * input_cols) X OTHERS + DSizes<TensorIndex, 3> input_dims; + if (isColMajor) { + input_dims[0] = kernelChannels; + input_dims[1] = inputRows * inputCols; + input_dims[2] = 1; + for (int i = 3; i < NumDims; ++i) { + input_dims[2] *= in.dimension(i); + } + eigen_assert(input_dims[2] == pre_contract_dims[3]); + } else { + input_dims[2] = kernelChannels; + input_dims[1] = inputRows * inputCols; + input_dims[0] = 1; + for (int i = 0; i < NumDims - 3; ++i) { + input_dims[0] *= in.dimension(i); + } + eigen_assert(input_dims[0] == pre_contract_dims[0]); + } + + // We will contract along dimensions (1, 2) in in and (1, 3) in out, if + // this is col-major. + // For row-major, it's dimensions (0, 1) in in and (0, 2) in out. + array<IndexPair<TensorIndex>, 2> contract_dims; + if (isColMajor) { + // col-major: in.contract(output.patches) + contract_dims[0] = IndexPair<TensorIndex>(1, 1); + contract_dims[1] = IndexPair<TensorIndex>(2, 3); + } else { + // row-major: output.patches.contract(in) + contract_dims[0] = IndexPair<TensorIndex>(0, 0); + contract_dims[1] = IndexPair<TensorIndex>(2, 1); + } + + // After the contraction, the kernel will have dimension + // in_depth X out_depth X kernel_rows X kernel_cols + // We will need to shuffle the first two dimensions and reverse the latter + // two dimensions. + // The end shape is + // out_depth X in_shape X kernel_rows X kernel_cols + + // This is the shape of the kernel *before* the shuffling. + DSizes<TensorIndex, 4> kernel_dims; + if (isColMajor) { + kernel_dims[0] = kernelChannels; + kernel_dims[1] = kernelFilters; + kernel_dims[2] = kernelRows; + kernel_dims[3] = kernelCols; + } else { + kernel_dims[0] = kernelCols; + kernel_dims[1] = kernelRows; + kernel_dims[2] = kernelFilters; + kernel_dims[3] = kernelChannels; + } + + array<TensorIndex, 4> kernel_shuffle; + if (isColMajor) { + kernel_shuffle[0] = 1; + kernel_shuffle[1] = 0; + kernel_shuffle[2] = 2; + kernel_shuffle[3] = 3; + } else { + kernel_shuffle[0] = 0; + kernel_shuffle[1] = 1; + kernel_shuffle[2] = 3; + kernel_shuffle[3] = 2; + } + + array<bool, 4> kernel_reverse; + if (isColMajor) { + kernel_reverse[0] = false; + kernel_reverse[1] = false; + kernel_reverse[2] = true; + kernel_reverse[3] = true; + } else { + kernel_reverse[0] = true; + kernel_reverse[1] = true; + kernel_reverse[2] = false; + kernel_reverse[3] = false; + } + + return choose(Cond<internal::traits<Input>::Layout == ColMajor>(), + input.reshape(input_dims).contract(output_backward.extract_image_patches(inputRows, inputCols, in_stride, in_stride, 1, 1, stride, stride, padding_top, padding_bottom, padding_left, padding_right, 0).reshape(pre_contract_dims).reshape(pre_contract_dims), contract_dims).reshape(kernel_dims).reverse(kernel_reverse).shuffle(kernel_shuffle), + output_backward.extract_image_patches(inputRows, inputCols, in_stride, in_stride, 1, 1, stride, stride, padding_top, padding_bottom, padding_left, padding_right, 0).reshape(pre_contract_dims).reshape(pre_contract_dims).contract(input.reshape(input_dims), contract_dims).reshape(kernel_dims).reverse(kernel_reverse).shuffle(kernel_shuffle)); +} + +} // end namespace Eigen + +#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_BACKWARD_SPATIAL_CONVOLUTIONS_H_ diff --git a/tensorflow/core/kernels/eigen_backward_spatial_convolutions_test.cc b/tensorflow/core/kernels/eigen_backward_spatial_convolutions_test.cc new file mode 100644 index 0000000000..9e77a71cb5 --- /dev/null +++ b/tensorflow/core/kernels/eigen_backward_spatial_convolutions_test.cc @@ -0,0 +1,1959 @@ +/* Copyright 2015 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/kernels/eigen_backward_spatial_convolutions.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/eigen_backward_cuboid_convolutions.h" +#include "tensorflow/core/platform/test.h" + +namespace Eigen { + +namespace { +void EigenApprox(float a, float b) { + ASSERT_TRUE(std::abs(a - b) <= std::min(std::abs(a), std::abs(b)) * 1e-3); +} +static int ceil_div(int a, int b) { return (a + b - 1) / b; } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_simple_spatial_convolution_backward_input_valid) { + const int input_depth = 2; + const int input_rows = 3; + const int input_cols = 4; + const int output_depth = 5; + const int patch_rows = 2; + const int patch_cols = 2; + const int output_rows = input_rows - patch_rows + 1; + const int output_cols = input_cols - patch_cols + 1; + + Tensor<float, 3> input_backward(input_depth, input_rows, input_cols); + Tensor<float, 4> kernel(output_depth, input_depth, patch_rows, patch_cols); + Tensor<float, 3> output_backward(output_depth, output_rows, output_cols); + + output_backward = output_backward.constant(11.0f) + output_backward.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + input_backward.setRandom(); + + input_backward = SpatialConvolutionBackwardInput(kernel, output_backward, + input_rows, input_cols, 1); + + EXPECT_EQ(input_backward.dimension(0), input_depth); + EXPECT_EQ(input_backward.dimension(1), input_rows); + EXPECT_EQ(input_backward.dimension(2), input_cols); + + for (int id = 0; id < input_depth; ++id) { + for (int i = 0; i < input_rows; ++i) { + for (int j = 0; j < input_cols; ++j) { + float expected = 0.0f; + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + for (int od = 0; od < output_depth; ++od) { + int output_i = i - r; + int output_j = j - c; + if (output_i >= 0 && output_i < output_rows && output_j >= 0 && + output_j < output_cols) { + expected += output_backward(od, output_i, output_j) * + kernel(od, id, r, c); + } + } + } + } + EigenApprox(input_backward(id, i, j), expected); + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_simple_spatial_convolution_backward_input_valid_row_major) { + const int input_depth = 2; + const int input_rows = 3; + const int input_cols = 4; + const int output_depth = 5; + const int patch_rows = 2; + const int patch_cols = 2; + const int output_rows = input_rows - patch_rows + 1; + const int output_cols = input_cols - patch_cols + 1; + + Tensor<float, 3, RowMajor> input_backward(input_cols, input_rows, + input_depth); + Tensor<float, 4, RowMajor> kernel(patch_cols, patch_rows, input_depth, + output_depth); + Tensor<float, 3, RowMajor> output_backward(output_cols, output_rows, + output_depth); + + output_backward = output_backward.constant(11.0f) + output_backward.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + input_backward.setRandom(); + + input_backward = SpatialConvolutionBackwardInput(kernel, output_backward, + input_rows, input_cols, 1); + + EXPECT_EQ(input_backward.dimension(0), input_cols); + EXPECT_EQ(input_backward.dimension(1), input_rows); + EXPECT_EQ(input_backward.dimension(2), input_depth); + + for (int id = 0; id < input_depth; ++id) { + for (int i = 0; i < input_rows; ++i) { + for (int j = 0; j < input_cols; ++j) { + float expected = 0.0f; + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + for (int od = 0; od < output_depth; ++od) { + int output_i = i - r; + int output_j = j - c; + if (output_i >= 0 && output_i < output_rows && output_j >= 0 && + output_j < output_cols) { + expected += output_backward(output_j, output_i, od) * + kernel(c, r, id, od); + } + } + } + } + EigenApprox(input_backward(j, i, id), expected); + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_simple_cuboid_convolution_backward_input_valid) { + const int input_depth = 2; + const int input_planes = 5; + const int input_rows = 3; + const int input_cols = 4; + const int patch_rows = 2; + const int patch_cols = 2; + const int patch_planes = 2; + const int output_rows = input_rows - patch_rows + 1; + const int output_cols = input_cols - patch_cols + 1; + const int output_planes = input_planes - patch_planes + 1; + const int output_depth = 5; + + Tensor<float, 4> input_backward(input_depth, input_planes, input_rows, + input_cols); + Tensor<float, 5> kernel(output_depth, input_depth, patch_planes, patch_rows, + patch_cols); + Tensor<float, 4> output_backward(output_depth, output_planes, output_rows, + output_cols); + + output_backward = output_backward.constant(11.0f) + output_backward.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + input_backward.setRandom(); + + input_backward = CuboidConvolutionBackwardInput( + kernel, output_backward, input_planes, input_rows, input_cols); + + EXPECT_EQ(input_backward.dimension(3), input_cols); + EXPECT_EQ(input_backward.dimension(2), input_rows); + EXPECT_EQ(input_backward.dimension(1), input_planes); + EXPECT_EQ(input_backward.dimension(0), input_depth); + + for (int id = 0; id < input_depth; ++id) { + for (int i = 0; i < input_planes; ++i) { + for (int j = 0; j < input_rows; ++j) { + for (int k = 0; k < input_cols; ++k) { + float expected = 0.0f; + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + for (int p = 0; p < patch_planes; ++p) { + for (int od = 0; od < output_depth; ++od) { + int output_j = j - r; + int output_k = k - c; + int output_i = i - p; + if (output_i >= 0 && output_i < output_planes && + output_j >= 0 && output_j < output_rows && + output_k >= 0 && output_k < output_cols) { + expected += + output_backward(od, output_i, output_j, output_k) * + kernel(od, id, p, r, c); + } + } + } + } + } + EigenApprox(input_backward(id, i, j, k), expected); + } + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_simple_cuboid_convolution_backward_input_valid_row_major) { + const int input_depth = 2; + const int input_planes = 5; + const int input_rows = 3; + const int input_cols = 4; + const int patch_rows = 2; + const int patch_cols = 2; + const int patch_planes = 2; + const int output_rows = input_rows - patch_rows + 1; + const int output_cols = input_cols - patch_cols + 1; + const int output_planes = input_planes - patch_planes + 1; + const int output_depth = 5; + + Tensor<float, 4, RowMajor> input_backward(input_cols, input_rows, + input_planes, input_depth); + Tensor<float, 5, RowMajor> kernel(patch_cols, patch_rows, patch_planes, + input_depth, output_depth); + Tensor<float, 4, RowMajor> output_backward(output_cols, output_rows, + output_planes, output_depth); + + output_backward = output_backward.constant(11.0f) + output_backward.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + input_backward.setRandom(); + + input_backward = CuboidConvolutionBackwardInput( + kernel, output_backward, input_planes, input_rows, input_cols); + + EXPECT_EQ(input_backward.dimension(0), input_cols); + EXPECT_EQ(input_backward.dimension(1), input_rows); + EXPECT_EQ(input_backward.dimension(2), input_planes); + EXPECT_EQ(input_backward.dimension(3), input_depth); + + for (int id = 0; id < input_depth; ++id) { + for (int i = 0; i < input_planes; ++i) { + for (int j = 0; j < input_rows; ++j) { + for (int k = 0; k < input_cols; ++k) { + float expected = 0.0f; + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + for (int p = 0; p < patch_planes; ++p) { + for (int od = 0; od < output_depth; ++od) { + int output_j = j - r; + int output_k = k - c; + int output_i = i - p; + if (output_i >= 0 && output_i < output_planes && + output_j >= 0 && output_j < output_rows && + output_k >= 0 && output_k < output_cols) { + expected += + output_backward(output_k, output_j, output_i, od) * + kernel(c, r, p, id, od); + } + } + } + } + } + EigenApprox(input_backward(k, j, i, id), expected); + } + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_simple_spatial_convolution_backward_input_same) { + const int input_depth = 2; + const int input_rows = 7; + const int input_cols = 9; + const int output_depth = 3; + const int patch_rows = 4; + const int patch_cols = 4; + const int output_rows = input_rows; + const int output_cols = input_cols; + + Tensor<float, 3> input_backward(input_depth, input_rows, input_cols); + Tensor<float, 4> kernel(output_depth, input_depth, patch_rows, patch_cols); + Tensor<float, 3> output_backward(output_depth, output_rows, output_cols); + + output_backward = output_backward.constant(11.0f) + output_backward.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + + input_backward = SpatialConvolutionBackwardInput(kernel, output_backward, + input_rows, input_cols, 1); + + EXPECT_EQ(input_backward.dimension(0), input_depth); + EXPECT_EQ(input_backward.dimension(1), input_rows); + EXPECT_EQ(input_backward.dimension(2), input_cols); + + for (int id = 0; id < input_depth; ++id) { + for (int i = 0; i < input_rows; ++i) { + for (int j = 0; j < input_cols; ++j) { + float expected = 0.0f; + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + for (int od = 0; od < output_depth; ++od) { + int output_i = i - r + (patch_rows - 1) / 2; + int output_j = j - c + (patch_cols - 1) / 2; + if (output_i >= 0 && output_i < output_rows && output_j >= 0 && + output_j < output_cols) { + expected += output_backward(od, output_i, output_j) * + kernel(od, id, r, c); + } + } + } + } + EigenApprox(input_backward(id, i, j), expected); + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_simple_spatial_convolution_backward_input_same_row_major) { + const int input_depth = 2; + const int input_rows = 7; + const int input_cols = 9; + const int output_depth = 3; + const int patch_rows = 4; + const int patch_cols = 4; + const int output_rows = input_rows; + const int output_cols = input_cols; + + Tensor<float, 3, RowMajor> input_backward(input_cols, input_rows, + input_depth); + Tensor<float, 4, RowMajor> kernel(patch_cols, patch_rows, input_depth, + output_depth); + Tensor<float, 3, RowMajor> output_backward(output_cols, output_rows, + output_depth); + + output_backward = output_backward.constant(11.0f) + output_backward.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + + input_backward = SpatialConvolutionBackwardInput(kernel, output_backward, + input_rows, input_cols, 1); + + EXPECT_EQ(input_backward.dimension(0), input_cols); + EXPECT_EQ(input_backward.dimension(1), input_rows); + EXPECT_EQ(input_backward.dimension(2), input_depth); + + for (int id = 0; id < input_depth; ++id) { + for (int i = 0; i < input_rows; ++i) { + for (int j = 0; j < input_cols; ++j) { + float expected = 0.0f; + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + for (int od = 0; od < output_depth; ++od) { + int output_i = i - r + (patch_rows - 1) / 2; + int output_j = j - c + (patch_cols - 1) / 2; + if (output_i >= 0 && output_i < output_rows && output_j >= 0 && + output_j < output_cols) { + expected += output_backward(output_j, output_i, od) * + kernel(c, r, id, od); + } + } + } + } + EigenApprox(input_backward(j, i, id), expected); + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_simple_cuboid_convolution_backward_input_same) { + const int input_depth = 2; + const int input_planes = 5; + const int input_rows = 3; + const int input_cols = 4; + const int patch_rows = 3; + const int patch_cols = 2; + const int patch_planes = 4; + const int output_rows = input_rows; + const int output_cols = input_cols; + const int output_planes = input_planes; + const int output_depth = 5; + + Tensor<float, 4> input_backward(input_depth, input_planes, input_rows, + input_cols); + Tensor<float, 5> kernel(output_depth, input_depth, patch_planes, patch_rows, + patch_cols); + Tensor<float, 4> output_backward(output_depth, output_planes, output_rows, + output_cols); + + output_backward = output_backward.constant(11.0f) + output_backward.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + input_backward.setRandom(); + + input_backward = CuboidConvolutionBackwardInput( + kernel, output_backward, input_planes, input_rows, input_cols); + + EXPECT_EQ(input_backward.dimension(3), input_cols); + EXPECT_EQ(input_backward.dimension(2), input_rows); + EXPECT_EQ(input_backward.dimension(1), input_planes); + EXPECT_EQ(input_backward.dimension(0), input_depth); + + const int dz = patch_planes - 1; + const int dy = patch_rows - 1; + const int dx = patch_cols - 1; + + const int forward_pad_x = dx - dx / 2; + const int forward_pad_y = dy - dy / 2; + const int forward_pad_z = dz - dz / 2; + + for (int id = 0; id < input_depth; ++id) { + for (int i = 0; i < input_planes; ++i) { + for (int j = 0; j < input_rows; ++j) { + for (int k = 0; k < input_cols; ++k) { + float expected = 0.0f; + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + for (int p = 0; p < patch_planes; ++p) { + for (int od = 0; od < output_depth; ++od) { + int output_i = i - p + forward_pad_z; + int output_j = j - r + forward_pad_y; + int output_k = k - c + forward_pad_x; + if (output_i >= 0 && output_i < output_planes && + output_j >= 0 && output_j < output_rows && + output_k >= 0 && output_k < output_cols) { + expected += + output_backward(od, output_i, output_j, output_k) * + kernel(od, id, p, r, c); + } + } + } + } + } + EigenApprox(input_backward(id, i, j, k), expected); + } + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_simple_cuboid_convolution_backward_input_same_row_major) { + const int input_depth = 2; + const int input_planes = 5; + const int input_rows = 3; + const int input_cols = 4; + const int patch_rows = 2; + const int patch_cols = 3; + const int patch_planes = 4; + const int output_rows = input_rows; + const int output_cols = input_cols; + const int output_planes = input_planes; + const int output_depth = 5; + + Tensor<float, 4, RowMajor> input_backward(input_cols, input_rows, + input_planes, input_depth); + Tensor<float, 5, RowMajor> kernel(patch_cols, patch_rows, patch_planes, + input_depth, output_depth); + Tensor<float, 4, RowMajor> output_backward(output_cols, output_rows, + output_planes, output_depth); + + output_backward = output_backward.constant(11.0f) + output_backward.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + input_backward.setRandom(); + + input_backward = CuboidConvolutionBackwardInput( + kernel, output_backward, input_planes, input_rows, input_cols); + + EXPECT_EQ(input_backward.dimension(0), input_cols); + EXPECT_EQ(input_backward.dimension(1), input_rows); + EXPECT_EQ(input_backward.dimension(2), input_planes); + EXPECT_EQ(input_backward.dimension(3), input_depth); + + const int dz = patch_planes - 1; + const int dy = patch_rows - 1; + const int dx = patch_cols - 1; + + const int forward_pad_x = dx - dx / 2; + const int forward_pad_y = dy - dy / 2; + const int forward_pad_z = dz - dz / 2; + + for (int id = 0; id < input_depth; ++id) { + for (int i = 0; i < input_planes; ++i) { + for (int j = 0; j < input_rows; ++j) { + for (int k = 0; k < input_cols; ++k) { + float expected = 0.0f; + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + for (int p = 0; p < patch_planes; ++p) { + for (int od = 0; od < output_depth; ++od) { + int output_i = i - p + forward_pad_z; + int output_j = j - r + forward_pad_y; + int output_k = k - c + forward_pad_x; + if (output_i >= 0 && output_i < output_planes && + output_j >= 0 && output_j < output_rows && + output_k >= 0 && output_k < output_cols) { + expected += + output_backward(output_k, output_j, output_i, od) * + kernel(c, r, p, id, od); + } + } + } + } + } + EigenApprox(input_backward(k, j, i, id), expected); + } + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_batched_spatial_convolution_backward_input_valid) { + const int num_batches = 13; + const int input_depth = 2; + const int input_rows = 7; + const int input_cols = 9; + const int output_depth = 3; + const int patch_rows = 5; + const int patch_cols = 5; + const int output_rows = input_rows - patch_rows + 1; + const int output_cols = input_cols - patch_cols + 1; + + Tensor<float, 4> input_backward(input_depth, input_rows, input_cols, + num_batches); + Tensor<float, 4> kernel(output_depth, input_depth, patch_rows, patch_cols); + Tensor<float, 4> output_backward(output_depth, output_rows, output_cols, + num_batches); + + output_backward = output_backward.constant(11.0f) + output_backward.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + input_backward.setRandom(); + + input_backward = SpatialConvolutionBackwardInput(kernel, output_backward, + input_rows, input_cols, 1); + + EXPECT_EQ(input_backward.dimension(0), input_depth); + EXPECT_EQ(input_backward.dimension(1), input_rows); + EXPECT_EQ(input_backward.dimension(2), input_cols); + EXPECT_EQ(input_backward.dimension(3), num_batches); + + for (int b = 0; b < num_batches; ++b) { + for (int id = 0; id < input_depth; ++id) { + for (int i = 0; i < input_rows; ++i) { + for (int j = 0; j < input_cols; ++j) { + float expected = 0.0f; + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + for (int od = 0; od < output_depth; ++od) { + int output_i = i - r; + int output_j = j - c; + if (output_i >= 0 && output_i < output_rows && output_j >= 0 && + output_j < output_cols) { + expected += output_backward(od, output_i, output_j, b) * + kernel(od, id, r, c); + } + } + } + } + EigenApprox(input_backward(id, i, j, b), expected); + } + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_batched_spatial_convolution_backward_input_valid_row_major) { + const int num_batches = 13; + const int input_depth = 2; + const int input_rows = 7; + const int input_cols = 9; + const int output_depth = 3; + const int patch_rows = 5; + const int patch_cols = 5; + const int output_rows = input_rows - patch_rows + 1; + const int output_cols = input_cols - patch_cols + 1; + + Tensor<float, 4, RowMajor> input_backward(num_batches, input_cols, input_rows, + input_depth); + Tensor<float, 4, RowMajor> kernel(patch_cols, patch_rows, input_depth, + output_depth); + Tensor<float, 4, RowMajor> output_backward(num_batches, output_cols, + output_rows, output_depth); + + output_backward = output_backward.constant(11.0f) + output_backward.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + input_backward.setRandom(); + + input_backward = SpatialConvolutionBackwardInput(kernel, output_backward, + input_rows, input_cols, 1); + + EXPECT_EQ(input_backward.dimension(0), num_batches); + EXPECT_EQ(input_backward.dimension(1), input_cols); + EXPECT_EQ(input_backward.dimension(2), input_rows); + EXPECT_EQ(input_backward.dimension(3), input_depth); + + for (int b = 0; b < num_batches; ++b) { + for (int id = 0; id < input_depth; ++id) { + for (int i = 0; i < input_rows; ++i) { + for (int j = 0; j < input_cols; ++j) { + float expected = 0.0f; + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + for (int od = 0; od < output_depth; ++od) { + int output_i = i - r; + int output_j = j - c; + if (output_i >= 0 && output_i < output_rows && output_j >= 0 && + output_j < output_cols) { + expected += output_backward(b, output_j, output_i, od) * + kernel(c, r, id, od); + } + } + } + } + EigenApprox(input_backward(b, j, i, id), expected); + } + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_batched_cuboid_convolution_backward_input_valid) { + const int num_batches = 13; + const int input_depth = 2; + const int input_planes = 5; + const int input_rows = 3; + const int input_cols = 4; + const int patch_rows = 2; + const int patch_cols = 2; + const int patch_planes = 2; + const int output_rows = input_rows - patch_rows + 1; + const int output_cols = input_cols - patch_cols + 1; + const int output_planes = input_planes - patch_planes + 1; + const int output_depth = 5; + + Tensor<float, 5> input_backward(input_depth, input_planes, input_rows, + input_cols, num_batches); + Tensor<float, 5> kernel(output_depth, input_depth, patch_planes, patch_rows, + patch_cols); + Tensor<float, 5> output_backward(output_depth, output_planes, output_rows, + output_cols, num_batches); + + output_backward = output_backward.constant(11.0f) + output_backward.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + input_backward.setRandom(); + + input_backward = CuboidConvolutionBackwardInput( + kernel, output_backward, input_planes, input_rows, input_cols); + + EXPECT_EQ(input_backward.dimension(4), num_batches); + EXPECT_EQ(input_backward.dimension(3), input_cols); + EXPECT_EQ(input_backward.dimension(2), input_rows); + EXPECT_EQ(input_backward.dimension(1), input_planes); + EXPECT_EQ(input_backward.dimension(0), input_depth); + + for (int b = 0; b < num_batches; ++b) { + for (int id = 0; id < input_depth; ++id) { + for (int i = 0; i < input_planes; ++i) { + for (int j = 0; j < input_rows; ++j) { + for (int k = 0; k < input_cols; ++k) { + float expected = 0.0f; + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + for (int p = 0; p < patch_planes; ++p) { + for (int od = 0; od < output_depth; ++od) { + int output_i = i - p; + int output_j = j - r; + int output_k = k - c; + if (output_i >= 0 && output_i < output_planes && + output_j >= 0 && output_j < output_rows && + output_k >= 0 && output_k < output_cols) { + expected += + output_backward(od, output_i, output_j, output_k, b) * + kernel(od, id, p, r, c); + } + } + } + } + } + EigenApprox(input_backward(id, i, j, k, b), expected); + } + } + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_batched_cuboid_convolution_backward_input_valid_row_major) { + const int num_batches = 13; + const int input_depth = 2; + const int input_planes = 5; + const int input_rows = 3; + const int input_cols = 4; + const int patch_rows = 2; + const int patch_cols = 2; + const int patch_planes = 2; + const int output_rows = input_rows - patch_rows + 1; + const int output_cols = input_cols - patch_cols + 1; + const int output_planes = input_planes - patch_planes + 1; + const int output_depth = 5; + + Tensor<float, 5, RowMajor> input_backward(num_batches, input_cols, input_rows, + input_planes, input_depth); + Tensor<float, 5, RowMajor> kernel(patch_cols, patch_rows, patch_planes, + input_depth, output_depth); + Tensor<float, 5, RowMajor> output_backward( + num_batches, output_cols, output_rows, output_planes, output_depth); + + output_backward = output_backward.constant(11.0f) + output_backward.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + input_backward.setRandom(); + + input_backward = CuboidConvolutionBackwardInput( + kernel, output_backward, input_planes, input_rows, input_cols); + + EXPECT_EQ(input_backward.dimension(0), num_batches); + EXPECT_EQ(input_backward.dimension(1), input_cols); + EXPECT_EQ(input_backward.dimension(2), input_rows); + EXPECT_EQ(input_backward.dimension(3), input_planes); + EXPECT_EQ(input_backward.dimension(4), input_depth); + + for (int b = 0; b < num_batches; ++b) { + for (int id = 0; id < input_depth; ++id) { + for (int i = 0; i < input_planes; ++i) { + for (int j = 0; j < input_rows; ++j) { + for (int k = 0; k < input_cols; ++k) { + float expected = 0.0f; + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + for (int p = 0; p < patch_planes; ++p) { + for (int od = 0; od < output_depth; ++od) { + int output_i = i - p; + int output_j = j - r; + int output_k = k - c; + if (output_i >= 0 && output_i < output_planes && + output_j >= 0 && output_j < output_rows && + output_k >= 0 && output_k < output_cols) { + expected += + output_backward(b, output_k, output_j, output_i, od) * + kernel(c, r, p, id, od); + } + } + } + } + } + EigenApprox(input_backward(b, k, j, i, id), expected); + } + } + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_batched_strided_spatial_convolution_backward_input_valid) { + const int num_batches = 11; + const int input_depth = 2; + const int input_rows = 9; + const int input_cols = 13; + const int output_depth = 5; + const int patch_rows = 3; + const int patch_cols = 3; + + const int stride = 3; + + const int output_rows = (input_rows - patch_rows + 1 + stride - 1) / stride; + const int output_cols = (input_cols - patch_cols + 1 + stride - 1) / stride; + + Tensor<float, 4> input_backward(input_depth, input_rows, input_cols, + num_batches); + Tensor<float, 4> kernel(output_depth, input_depth, patch_rows, patch_cols); + Tensor<float, 4> output_backward(output_depth, output_rows, output_cols, + num_batches); + + output_backward = output_backward.constant(11.0f) + output_backward.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + input_backward.setRandom(); + + input_backward = SpatialConvolutionBackwardInput( + kernel, output_backward, input_rows, input_cols, stride); + + EXPECT_EQ(input_backward.dimension(0), input_depth); + EXPECT_EQ(input_backward.dimension(1), input_rows); + EXPECT_EQ(input_backward.dimension(2), input_cols); + EXPECT_EQ(input_backward.dimension(3), num_batches); + + for (int b = 0; b < num_batches; ++b) { + for (int id = 0; id < input_depth; ++id) { + for (int i = 0; i < input_rows; ++i) { + for (int j = 0; j < input_cols; ++j) { + float expected = 0.0f; + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + for (int od = 0; od < output_depth; ++od) { + int output_i = i - r; + int output_j = j - c; + if (output_i >= 0 && output_i / stride < output_rows && + output_j >= 0 && output_j / stride < output_cols && + output_i % stride == 0 && output_j % stride == 0) { + expected += output_backward(od, output_i / stride, + output_j / stride, b) * + kernel(od, id, r, c); + } + } + } + } + EigenApprox(input_backward(id, i, j, b), expected); + } + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_batched_strided_spatial_convolution_backward_input_valid_row_major) { + const int num_batches = 11; + const int input_depth = 3; + const int input_rows = 5; + const int input_cols = 9; + const int output_depth = 1; + const int patch_rows = 3; + const int patch_cols = 3; + + const int stride = 2; + + const int output_rows = (input_rows - patch_rows + 2) / stride; + const int output_cols = (input_cols - patch_cols + 2) / stride; + + Tensor<float, 4, RowMajor> input_backward(num_batches, input_cols, input_rows, + input_depth); + Tensor<float, 4, RowMajor> kernel(patch_cols, patch_rows, input_depth, + output_depth); + Tensor<float, 4, RowMajor> output_backward(num_batches, output_cols, + output_rows, output_depth); + + output_backward = output_backward.constant(11.0f) + output_backward.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + input_backward.setRandom(); + + input_backward = SpatialConvolutionBackwardInput( + kernel, output_backward, input_rows, input_cols, stride); + + EXPECT_EQ(input_backward.dimension(0), num_batches); + EXPECT_EQ(input_backward.dimension(1), input_cols); + EXPECT_EQ(input_backward.dimension(2), input_rows); + EXPECT_EQ(input_backward.dimension(3), input_depth); + + for (int b = 0; b < num_batches; ++b) { + for (int id = 0; id < input_depth; ++id) { + for (int i = 0; i < input_rows; ++i) { + for (int j = 0; j < input_cols; ++j) { + float expected = 0.0f; + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + for (int od = 0; od < output_depth; ++od) { + int output_i = i - r; + int output_j = j - c; + if (output_i >= 0 && output_i / stride < output_rows && + output_j >= 0 && output_j / stride < output_cols && + output_i % stride == 0 && output_j % stride == 0) { + expected += output_backward(b, output_j / stride, + output_i / stride, od) * + kernel(c, r, id, od); + } + } + } + } + EigenApprox(input_backward(b, j, i, id), expected); + } + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_simple_spatial_convolution_backward_kernel_valid) { + const int input_depth = 2; + const int input_rows = 3; + const int input_cols = 4; + const int output_depth = 5; + const int patch_rows = 2; + const int patch_cols = 2; + const int output_rows = input_rows - patch_rows + 1; + const int output_cols = input_cols - patch_cols + 1; + + Tensor<float, 3> input(input_depth, input_rows, input_cols); + Tensor<float, 4> kernel(output_depth, input_depth, patch_rows, patch_cols); + Tensor<float, 3> output_backward(output_depth, output_rows, output_cols); + + output_backward = output_backward.constant(11.0f) + output_backward.random(); + input = input.constant(2.0f) + input.random(); + kernel.setRandom(); + + kernel = SpatialConvolutionBackwardKernel(input, output_backward, patch_rows, + patch_cols, 1); + + EXPECT_EQ(kernel.dimension(0), output_depth); + EXPECT_EQ(kernel.dimension(1), input_depth); + EXPECT_EQ(kernel.dimension(2), patch_rows); + EXPECT_EQ(kernel.dimension(3), patch_cols); + + for (int od = 0; od < output_depth; ++od) { + for (int id = 0; id < input_depth; ++id) { + for (int r = 0; r < patch_rows; ++r) { + for (int c = 0; c < patch_cols; ++c) { + float expected = 0.0f; + for (int i = 0; i < input_rows; ++i) { + for (int j = 0; j < input_cols; ++j) { + int output_i = i - r; + int output_j = j - c; + if (output_i >= 0 && output_i < output_rows && output_j >= 0 && + output_j < output_cols) { + expected += + input(id, i, j) * output_backward(od, output_i, output_j); + } + } + } + EigenApprox(kernel(od, id, r, c), expected); + } + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_simple_spatial_convolution_backward_kernel_valid_row_major) { + const int input_depth = 2; + const int input_rows = 3; + const int input_cols = 4; + const int output_depth = 5; + const int patch_rows = 2; + const int patch_cols = 2; + const int output_rows = input_rows - patch_rows + 1; + const int output_cols = input_cols - patch_cols + 1; + + Tensor<float, 3, RowMajor> input(input_cols, input_rows, input_depth); + Tensor<float, 4, RowMajor> kernel(patch_cols, patch_rows, input_depth, + output_depth); + Tensor<float, 3, RowMajor> output_backward(output_cols, output_rows, + output_depth); + + output_backward = output_backward.constant(11.0f) + output_backward.random(); + input = input.constant(2.0f) + input.random(); + kernel.setRandom(); + + kernel = SpatialConvolutionBackwardKernel(input, output_backward, patch_rows, + patch_cols, 1); + + EXPECT_EQ(kernel.dimension(0), patch_cols); + EXPECT_EQ(kernel.dimension(1), patch_rows); + EXPECT_EQ(kernel.dimension(2), input_depth); + EXPECT_EQ(kernel.dimension(3), output_depth); + + for (int od = 0; od < output_depth; ++od) { + for (int id = 0; id < input_depth; ++id) { + for (int r = 0; r < patch_rows; ++r) { + for (int c = 0; c < patch_cols; ++c) { + float expected = 0.0f; + for (int i = 0; i < input_rows; ++i) { + for (int j = 0; j < input_cols; ++j) { + int output_i = i - r; + int output_j = j - c; + if (output_i >= 0 && output_i < output_rows && output_j >= 0 && + output_j < output_cols) { + expected += + input(j, i, id) * output_backward(output_j, output_i, od); + } + } + } + EigenApprox(kernel(c, r, id, od), expected); + } + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_batched_atrous_spatial_convolution_backward_input_valid) { + const int num_batches = 11; + const int patch_rows = 3; + const int patch_cols = 3; + + const int input_depth = 2; + const int input_rows = 9; + const int input_cols = 13; + + const int in_stride = 3; + const int patch_rows_eff = patch_rows + (patch_rows - 1) * (in_stride - 1); + const int patch_cols_eff = patch_cols + (patch_cols - 1) * (in_stride - 1); + + const int output_depth = 5; + const int output_rows = input_rows - patch_rows_eff + 1; + const int output_cols = input_cols - patch_cols_eff + 1; + + Tensor<float, 4> output_backward(output_depth, output_rows, output_cols, + num_batches); + output_backward.setRandom(); + Tensor<float, 4> kernel(output_depth, input_depth, patch_rows, patch_cols); + kernel.setRandom(); + + const array<DenseIndex, 4> kernel_strides({1, 1, in_stride, in_stride}); + const Tensor<float, 4> kernel_eff = kernel.inflate(kernel_strides); + + const Tensor<float, 4> input_backward = SpatialConvolutionBackwardInput( + kernel, output_backward, input_rows, input_cols, 1, in_stride); + const Tensor<float, 4> expected_input_backward = + SpatialConvolutionBackwardInput(kernel_eff, output_backward, input_rows, + input_cols); + + EXPECT_EQ(input_backward.dimension(0), input_depth); + EXPECT_EQ(input_backward.dimension(1), input_rows); + EXPECT_EQ(input_backward.dimension(2), input_cols); + EXPECT_EQ(input_backward.dimension(3), num_batches); + + eigen_assert(dimensions_match(input_backward.dimensions(), + expected_input_backward.dimensions())); + for (size_t i = 0; i < input_backward.dimensions().TotalSize(); ++i) { + EigenApprox(input_backward.data()[i], expected_input_backward.data()[i]); + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_batched_atrous_spatial_convolution_backward_input_valid_row_major) { + const int num_batches = 11; + const int patch_rows = 3; + const int patch_cols = 3; + + const int input_depth = 2; + const int input_rows = 9; + const int input_cols = 13; + + const int in_stride = 3; + const int patch_rows_eff = patch_rows + (patch_rows - 1) * (in_stride - 1); + const int patch_cols_eff = patch_cols + (patch_cols - 1) * (in_stride - 1); + + const int output_depth = 5; + const int output_rows = input_rows - patch_rows_eff + 1; + const int output_cols = input_cols - patch_cols_eff + 1; + + Tensor<float, 4, RowMajor> output_backward(num_batches, output_cols, + output_rows, output_depth); + output_backward.setRandom(); + + Tensor<float, 4, RowMajor> kernel(patch_cols, patch_rows, input_depth, + output_depth); + kernel.setRandom(); + + const array<DenseIndex, 4> kernel_strides({in_stride, in_stride, 1, 1}); + const Tensor<float, 4, RowMajor> kernel_eff = kernel.inflate(kernel_strides); + + const Tensor<float, 4, RowMajor> input_backward = + SpatialConvolutionBackwardInput(kernel, output_backward, input_rows, + input_cols, 1, in_stride); + const Tensor<float, 4, RowMajor> expected_input_backward = + SpatialConvolutionBackwardInput(kernel_eff, output_backward, input_rows, + input_cols); + + EXPECT_EQ(input_backward.dimension(0), num_batches); + EXPECT_EQ(input_backward.dimension(1), input_cols); + EXPECT_EQ(input_backward.dimension(2), input_rows); + EXPECT_EQ(input_backward.dimension(3), input_depth); + + eigen_assert(dimensions_match(input_backward.dimensions(), + expected_input_backward.dimensions())); + for (size_t i = 0; i < input_backward.dimensions().TotalSize(); ++i) { + EigenApprox(input_backward.data()[i], expected_input_backward.data()[i]); + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_batched_atrous_spatial_convolution_backward_kernel_valid) { + const int num_batches = 11; + const int patch_rows = 3; + const int patch_cols = 3; + + const int input_depth = 2; + const int input_rows = 9; + const int input_cols = 13; + + const int in_stride = 3; + const int patch_rows_eff = patch_rows + (patch_rows - 1) * (in_stride - 1); + const int patch_cols_eff = patch_cols + (patch_cols - 1) * (in_stride - 1); + + const int output_depth = 5; + const int output_rows = input_rows - patch_rows_eff + 1; + const int output_cols = input_cols - patch_cols_eff + 1; + + Tensor<float, 4> output_backward(output_depth, output_rows, output_cols, + num_batches); + output_backward.setRandom(); + + Tensor<float, 4> input(input_depth, input_rows, input_cols, num_batches); + input.setRandom(); + + const array<DenseIndex, 4> kernel_strides({1, 1, in_stride, in_stride}); + + const Tensor<float, 4> kernel_backward = SpatialConvolutionBackwardKernel( + input, output_backward, patch_rows, patch_cols, 1, in_stride); + const Tensor<float, 4> expected_kernel_backward = + SpatialConvolutionBackwardKernel(input, output_backward, patch_rows_eff, + patch_cols_eff) + .stride(kernel_strides); + + EXPECT_EQ(kernel_backward.dimension(0), output_depth); + EXPECT_EQ(kernel_backward.dimension(1), input_depth); + EXPECT_EQ(kernel_backward.dimension(2), patch_rows); + EXPECT_EQ(kernel_backward.dimension(3), patch_cols); + + eigen_assert(dimensions_match(kernel_backward.dimensions(), + expected_kernel_backward.dimensions())); + for (size_t i = 0; i < kernel_backward.dimensions().TotalSize(); ++i) { + EigenApprox(kernel_backward.data()[i], expected_kernel_backward.data()[i]); + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_batched_atrous_spatial_convolution_backward_kernel_valid_row_major) { + const int num_batches = 11; + const int patch_rows = 3; + const int patch_cols = 3; + + const int input_depth = 2; + const int input_rows = 9; + const int input_cols = 13; + + const int in_stride = 3; + const int patch_rows_eff = patch_rows + (patch_rows - 1) * (in_stride - 1); + const int patch_cols_eff = patch_cols + (patch_cols - 1) * (in_stride - 1); + + const int output_depth = 5; + const int output_rows = input_rows - patch_rows_eff + 1; + const int output_cols = input_cols - patch_cols_eff + 1; + + Tensor<float, 4, RowMajor> output_backward(num_batches, output_cols, + output_rows, output_depth); + output_backward.setRandom(); + + Tensor<float, 4, RowMajor> input(num_batches, input_cols, input_rows, + input_depth); + input.setRandom(); + + const array<DenseIndex, 4> kernel_strides({in_stride, in_stride, 1, 1}); + + const Tensor<float, 4, RowMajor> kernel_backward = + SpatialConvolutionBackwardKernel(input, output_backward, patch_rows, + patch_cols, 1, in_stride); + const Tensor<float, 4, RowMajor> expected_kernel_backward = + SpatialConvolutionBackwardKernel(input, output_backward, patch_rows_eff, + patch_cols_eff) + .stride(kernel_strides); + + EXPECT_EQ(kernel_backward.dimension(0), patch_cols); + EXPECT_EQ(kernel_backward.dimension(1), patch_rows); + EXPECT_EQ(kernel_backward.dimension(2), input_depth); + EXPECT_EQ(kernel_backward.dimension(3), output_depth); + + eigen_assert(dimensions_match(kernel_backward.dimensions(), + expected_kernel_backward.dimensions())); + for (size_t i = 0; i < kernel_backward.dimensions().TotalSize(); ++i) { + EigenApprox(kernel_backward.data()[i], expected_kernel_backward.data()[i]); + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_simple_cuboid_convolution_backward_kernel_valid) { + const int input_depth = 2; + const int input_planes = 5; + const int input_rows = 3; + const int input_cols = 4; + const int output_depth = 5; + const int patch_rows = 2; + const int patch_cols = 2; + const int patch_planes = 3; + const int output_rows = input_rows - patch_rows + 1; + const int output_cols = input_cols - patch_cols + 1; + const int output_planes = input_planes - patch_planes + 1; + + Tensor<float, 4> input(input_depth, input_planes, input_rows, input_cols); + Tensor<float, 5> kernel(output_depth, input_depth, patch_planes, patch_rows, + patch_cols); + Tensor<float, 4> output_backward(output_depth, output_planes, output_rows, + output_cols); + + output_backward = output_backward.constant(11.0f) + output_backward.random(); + input = input.constant(2.0f) + input.random(); + kernel.setRandom(); + + kernel = CuboidConvolutionBackwardKernel(input, output_backward, patch_planes, + patch_rows, patch_cols, 1, 1, 1); + + EXPECT_EQ(kernel.dimension(0), output_depth); + EXPECT_EQ(kernel.dimension(1), input_depth); + EXPECT_EQ(kernel.dimension(2), patch_planes); + EXPECT_EQ(kernel.dimension(3), patch_rows); + EXPECT_EQ(kernel.dimension(4), patch_cols); + + for (int od = 0; od < output_depth; ++od) { + for (int id = 0; id < input_depth; ++id) { + for (int p = 0; p < patch_planes; ++p) { + for (int r = 0; r < patch_rows; ++r) { + for (int c = 0; c < patch_cols; ++c) { + float expected = 0.0f; + for (int i = 0; i < input_planes; ++i) { + for (int j = 0; j < input_rows; ++j) { + for (int k = 0; k < input_cols; ++k) { + int output_j = j - r; + int output_k = k - c; + int output_i = i - p; + if (output_i >= 0 && output_i < output_planes && + output_j >= 0 && output_j < output_rows && + output_k >= 0 && output_k < output_cols) { + expected += + input(id, i, j, k) * + output_backward(od, output_i, output_j, output_k); + } + } + } + } + EigenApprox(kernel(od, id, p, r, c), expected); + } + } + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_simple_cuboid_convolution_backward_kernel_valid_row_major) { + const int input_depth = 2; + const int input_planes = 5; + const int input_rows = 3; + const int input_cols = 4; + const int output_depth = 5; + const int patch_rows = 2; + const int patch_cols = 2; + const int patch_planes = 3; + const int output_rows = input_rows - patch_rows + 1; + const int output_cols = input_cols - patch_cols + 1; + const int output_planes = input_planes - patch_planes + 1; + + Tensor<float, 4, RowMajor> input(input_cols, input_rows, input_planes, + input_depth); + Tensor<float, 5, RowMajor> kernel(patch_cols, patch_rows, patch_planes, + input_depth, output_depth); + Tensor<float, 4, RowMajor> output_backward(output_cols, output_rows, + output_planes, output_depth); + + output_backward = output_backward.constant(11.0f) + output_backward.random(); + input = input.constant(2.0f) + input.random(); + kernel.setRandom(); + + kernel = CuboidConvolutionBackwardKernel(input, output_backward, patch_planes, + patch_rows, patch_cols, 1, 1, 1); + + EXPECT_EQ(kernel.dimension(4), output_depth); + EXPECT_EQ(kernel.dimension(3), input_depth); + EXPECT_EQ(kernel.dimension(2), patch_planes); + EXPECT_EQ(kernel.dimension(1), patch_rows); + EXPECT_EQ(kernel.dimension(0), patch_cols); + + for (int od = 0; od < output_depth; ++od) { + for (int id = 0; id < input_depth; ++id) { + for (int p = 0; p < patch_planes; ++p) { + for (int r = 0; r < patch_rows; ++r) { + for (int c = 0; c < patch_cols; ++c) { + float expected = 0.0f; + for (int i = 0; i < input_planes; ++i) { + for (int j = 0; j < input_rows; ++j) { + for (int k = 0; k < input_cols; ++k) { + int output_j = j - r; + int output_k = k - c; + int output_i = i - p; + if (output_i >= 0 && output_i < output_planes && + output_j >= 0 && output_j < output_rows && + output_k >= 0 && output_k < output_cols) { + expected += + input(k, j, i, id) * + output_backward(output_k, output_j, output_i, od); + } + } + } + } + EigenApprox(kernel(c, r, p, id, od), expected); + } + } + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_batched_spatial_convolution_backward_kernel_valid) { + const int num_batches = 13; + const int input_depth = 2; + const int input_rows = 7; + const int input_cols = 9; + const int output_depth = 3; + const int patch_rows = 5; + const int patch_cols = 5; + const int output_rows = input_rows - patch_rows + 1; + const int output_cols = input_cols - patch_cols + 1; + + Tensor<float, 4> input(input_depth, input_rows, input_cols, num_batches); + Tensor<float, 4> kernel_backward(output_depth, input_depth, patch_rows, + patch_cols); + Tensor<float, 4> output_backward(output_depth, output_rows, output_cols, + num_batches); + + output_backward = output_backward.constant(11.0f) + output_backward.random(); + input = input.constant(2.0f) + input.random(); + kernel_backward.setRandom(); + + kernel_backward = SpatialConvolutionBackwardKernel(input, output_backward, + patch_rows, patch_cols, 1); + + EXPECT_EQ(kernel_backward.dimension(0), output_depth); + EXPECT_EQ(kernel_backward.dimension(1), input_depth); + EXPECT_EQ(kernel_backward.dimension(2), patch_rows); + EXPECT_EQ(kernel_backward.dimension(3), patch_cols); + + for (int od = 0; od < output_depth; ++od) { + for (int id = 0; id < input_depth; ++id) { + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + float expected = 0.0f; + for (int b = 0; b < num_batches; ++b) { + for (int i = 0; i < input_rows; ++i) { + for (int j = 0; j < input_cols; ++j) { + int output_i = i - r; + int output_j = j - c; + if (output_i >= 0 && output_i < output_rows && output_j >= 0 && + output_j < output_cols) { + expected += input(id, i, j, b) * + output_backward(od, output_i, output_j, b); + } + } + } + } + EigenApprox(kernel_backward(od, id, r, c), expected); + } + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_batched_spatial_convolution_backward_kernel_valid_row_major) { + const int num_batches = 13; + const int input_depth = 2; + const int input_rows = 7; + const int input_cols = 9; + const int output_depth = 3; + const int patch_rows = 4; + const int patch_cols = 4; + const int output_rows = input_rows - patch_rows + 1; + const int output_cols = input_cols - patch_cols + 1; + + Tensor<float, 4, RowMajor> input(num_batches, input_cols, input_rows, + input_depth); + Tensor<float, 4, RowMajor> kernel_backward(patch_cols, patch_rows, + input_depth, output_depth); + Tensor<float, 4, RowMajor> output_backward(num_batches, output_cols, + output_rows, output_depth); + + output_backward = output_backward.constant(11.0f) + output_backward.random(); + input = input.constant(2.0f) + input.random(); + kernel_backward.setRandom(); + + kernel_backward = SpatialConvolutionBackwardKernel(input, output_backward, + patch_rows, patch_cols, 1); + + EXPECT_EQ(kernel_backward.dimension(0), patch_cols); + EXPECT_EQ(kernel_backward.dimension(1), patch_rows); + EXPECT_EQ(kernel_backward.dimension(2), input_depth); + EXPECT_EQ(kernel_backward.dimension(3), output_depth); + + for (int od = 0; od < output_depth; ++od) { + for (int id = 0; id < input_depth; ++id) { + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + float expected = 0.0f; + for (int b = 0; b < num_batches; ++b) { + for (int i = 0; i < input_rows; ++i) { + for (int j = 0; j < input_cols; ++j) { + int output_i = i - r; + int output_j = j - c; + if (output_i >= 0 && output_i < output_rows && output_j >= 0 && + output_j < output_cols) { + expected += input(b, j, i, id) * + output_backward(b, output_j, output_i, od); + } + } + } + } + EigenApprox(kernel_backward(c, r, id, od), expected); + } + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_batched_cuboid_convolution_backward_kernel_valid) { + const int num_batches = 13; + const int input_depth = 2; + const int input_planes = 5; + const int input_rows = 7; + const int input_cols = 9; + const int output_depth = 3; + const int patch_rows = 5; + const int patch_cols = 5; + const int patch_planes = 3; + const int output_rows = input_rows - patch_rows + 1; + const int output_cols = input_cols - patch_cols + 1; + const int output_planes = input_planes - patch_planes + 1; + + Tensor<float, 5> input(input_depth, input_planes, input_rows, input_cols, + num_batches); + Tensor<float, 5> kernel_backward(output_depth, input_depth, patch_planes, + patch_rows, patch_cols); + Tensor<float, 5> output_backward(output_depth, output_planes, output_rows, + output_cols, num_batches); + + output_backward = output_backward.constant(11.0f) + output_backward.random(); + input = input.constant(2.0f) + input.random(); + kernel_backward.setRandom(); + + kernel_backward = CuboidConvolutionBackwardKernel( + input, output_backward, patch_planes, patch_rows, patch_cols, 1, 1, 1); + + EXPECT_EQ(kernel_backward.dimension(0), output_depth); + EXPECT_EQ(kernel_backward.dimension(1), input_depth); + EXPECT_EQ(kernel_backward.dimension(2), patch_planes); + EXPECT_EQ(kernel_backward.dimension(3), patch_rows); + EXPECT_EQ(kernel_backward.dimension(4), patch_cols); + + for (int od = 0; od < output_depth; ++od) { + for (int id = 0; id < input_depth; ++id) { + for (int p = 0; p < patch_planes; ++p) { + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + float expected = 0.0f; + for (int b = 0; b < num_batches; ++b) { + for (int i = 0; i < input_planes; ++i) { + for (int j = 0; j < input_rows; ++j) { + for (int k = 0; k < input_cols; ++k) { + int output_j = j - r; + int output_k = k - c; + int output_i = i - p; + if (output_i >= 0 && output_i < output_planes && + output_j >= 0 && output_j < output_rows && + output_k >= 0 && output_k < output_cols) { + expected += + input(id, i, j, k, b) * + output_backward(od, output_i, output_j, output_k, b); + } + } + } + } + } + EigenApprox(kernel_backward(od, id, p, r, c), expected); + } + } + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_batched_cuboid_convolution_backward_kernel_valid_row_major) { + const int num_batches = 13; + const int input_depth = 2; + const int input_planes = 5; + const int input_rows = 7; + const int input_cols = 9; + const int output_depth = 3; + const int patch_rows = 5; + const int patch_cols = 5; + const int patch_planes = 3; + const int output_rows = input_rows - patch_rows + 1; + const int output_cols = input_cols - patch_cols + 1; + const int output_planes = input_planes - patch_planes + 1; + + Tensor<float, 5, RowMajor> input(num_batches, input_cols, input_rows, + input_planes, input_depth); + Tensor<float, 5, RowMajor> kernel_backward( + patch_cols, patch_rows, patch_planes, input_depth, output_depth); + Tensor<float, 5, RowMajor> output_backward( + num_batches, output_cols, output_rows, output_planes, output_depth); + + output_backward = output_backward.constant(11.0f) + output_backward.random(); + input = input.constant(2.0f) + input.random(); + kernel_backward.setRandom(); + + kernel_backward = CuboidConvolutionBackwardKernel( + input, output_backward, patch_planes, patch_rows, patch_cols, 1, 1, 1); + + EXPECT_EQ(kernel_backward.dimension(4), output_depth); + EXPECT_EQ(kernel_backward.dimension(3), input_depth); + EXPECT_EQ(kernel_backward.dimension(2), patch_planes); + EXPECT_EQ(kernel_backward.dimension(1), patch_rows); + EXPECT_EQ(kernel_backward.dimension(0), patch_cols); + + for (int od = 0; od < output_depth; ++od) { + for (int id = 0; id < input_depth; ++id) { + for (int p = 0; p < patch_planes; ++p) { + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + float expected = 0.0f; + for (int b = 0; b < num_batches; ++b) { + for (int i = 0; i < input_planes; ++i) { + for (int j = 0; j < input_rows; ++j) { + for (int k = 0; k < input_cols; ++k) { + int output_j = j - r; + int output_k = k - c; + int output_i = i - p; + if (output_i >= 0 && output_i < output_planes && + output_j >= 0 && output_j < output_rows && + output_k >= 0 && output_k < output_cols) { + expected += + input(b, k, j, i, id) * + output_backward(b, output_k, output_j, output_i, od); + } + } + } + } + } + EigenApprox(kernel_backward(c, r, p, id, od), expected); + } + } + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_batched_strided_spatial_convolution_backward_kernel_valid) { + const int num_batches = 13; + const int input_depth = 2; + const int input_rows = 7; + const int input_cols = 9; + const int output_depth = 3; + const int patch_rows = 5; + const int patch_cols = 5; + + const int stride = 2; + + const int output_rows = (input_rows - patch_rows + 1 + stride - 1) / stride; + const int output_cols = (input_cols - patch_cols + 1 + stride - 1) / stride; + + Tensor<float, 4> input(input_depth, input_rows, input_cols, num_batches); + Tensor<float, 4> kernel_backward(output_depth, input_depth, patch_rows, + patch_cols); + Tensor<float, 4> output_backward(output_depth, output_rows, output_cols, + num_batches); + + output_backward = output_backward.constant(11.0f) + output_backward.random(); + input = input.constant(2.0f) + input.random(); + kernel_backward.setRandom(); + + kernel_backward = SpatialConvolutionBackwardKernel( + input, output_backward, patch_rows, patch_cols, stride); + + EXPECT_EQ(kernel_backward.dimension(0), output_depth); + EXPECT_EQ(kernel_backward.dimension(1), input_depth); + EXPECT_EQ(kernel_backward.dimension(2), patch_rows); + EXPECT_EQ(kernel_backward.dimension(3), patch_cols); + + for (int od = 0; od < output_depth; ++od) { + for (int id = 0; id < input_depth; ++id) { + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + float expected = 0.0f; + for (int b = 0; b < num_batches; ++b) { + for (int i = 0; i < input_rows; ++i) { + for (int j = 0; j < input_cols; ++j) { + int output_i = i - r; + int output_j = j - c; + if (output_i >= 0 && output_i / stride < output_rows && + output_j >= 0 && output_j / stride < output_cols && + output_i % stride == 0 && output_j % stride == 0) { + expected += input(id, i, j, b) * + output_backward(od, output_i / stride, + output_j / stride, b); + } + } + } + } + EigenApprox(kernel_backward(od, id, r, c), expected); + } + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_batched_strided_spatial_convolution_backward_kernel_valid_row_major) { + const int num_batches = 13; + const int input_depth = 2; + const int input_rows = 7; + const int input_cols = 9; + const int output_depth = 3; + const int patch_rows = 4; + const int patch_cols = 4; + + const int stride = 2; + + const int output_rows = (input_rows - patch_rows + 1 + stride - 1) / stride; + const int output_cols = (input_cols - patch_cols + 1 + stride - 1) / stride; + + Tensor<float, 4, RowMajor> input(num_batches, input_cols, input_rows, + input_depth); + Tensor<float, 4, RowMajor> kernel_backward(patch_cols, patch_rows, + input_depth, output_depth); + Tensor<float, 4, RowMajor> output_backward(num_batches, output_cols, + output_rows, output_depth); + + output_backward = output_backward.constant(11.0f) + output_backward.random(); + input = input.constant(2.0f) + input.random(); + kernel_backward.setRandom(); + + kernel_backward = SpatialConvolutionBackwardKernel( + input, output_backward, patch_rows, patch_cols, stride); + + EXPECT_EQ(kernel_backward.dimension(0), patch_cols); + EXPECT_EQ(kernel_backward.dimension(1), patch_rows); + EXPECT_EQ(kernel_backward.dimension(2), input_depth); + EXPECT_EQ(kernel_backward.dimension(3), output_depth); + + for (int od = 0; od < output_depth; ++od) { + for (int id = 0; id < input_depth; ++id) { + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + float expected = 0.0f; + for (int b = 0; b < num_batches; ++b) { + for (int i = 0; i < input_rows; ++i) { + for (int j = 0; j < input_cols; ++j) { + int output_i = i - r; + int output_j = j - c; + if (output_i >= 0 && output_i / stride < output_rows && + output_j >= 0 && output_j / stride < output_cols && + output_i % stride == 0 && output_j % stride == 0) { + expected += input(b, j, i, id) * + output_backward(b, output_j / stride, + output_i / stride, od); + } + } + } + } + EigenApprox(kernel_backward(c, r, id, od), expected); + } + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_batched_strided_cuboid_convolution_backward_kernel_valid) { + const int num_batches = 13; + const int input_depth = 2; + const int input_planes = 8; + const int input_rows = 7; + const int input_cols = 9; + const int output_depth = 3; + const int patch_planes = 3; + const int patch_rows = 3; + const int patch_cols = 2; + + const int stride_planes = 2; + const int stride_cols = 3; + const int stride_rows = 1; + + const int output_rows = ceil_div(input_rows - patch_rows + 1, stride_rows); + const int output_cols = ceil_div(input_cols - patch_cols + 1, stride_cols); + const int output_planes = + ceil_div(input_planes - patch_planes + 1, stride_planes); + + Tensor<float, 5> input(input_depth, input_planes, input_rows, input_cols, + num_batches); + Tensor<float, 5> kernel_backward(output_depth, input_depth, patch_planes, + patch_rows, patch_cols); + Tensor<float, 5> output_backward(output_depth, output_planes, output_rows, + output_cols, num_batches); + + output_backward = output_backward.constant(11.0f) + output_backward.random(); + input = input.constant(2.0f) + input.random(); + kernel_backward.setRandom(); + + kernel_backward = CuboidConvolutionBackwardKernel( + input, output_backward, patch_planes, patch_rows, patch_cols, + stride_planes, stride_rows, stride_cols); + + EXPECT_EQ(kernel_backward.dimension(0), output_depth); + EXPECT_EQ(kernel_backward.dimension(1), input_depth); + EXPECT_EQ(kernel_backward.dimension(2), patch_planes); + EXPECT_EQ(kernel_backward.dimension(3), patch_rows); + EXPECT_EQ(kernel_backward.dimension(4), patch_cols); + + for (int od = 0; od < output_depth; ++od) { + for (int id = 0; id < input_depth; ++id) { + for (int p = 0; p < patch_planes; ++p) { + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + float expected = 0.0f; + for (int b = 0; b < num_batches; ++b) { + for (int i = 0; i < input_planes; ++i) { + for (int j = 0; j < input_rows; ++j) { + for (int k = 0; k < input_cols; ++k) { + int output_j = j - r; + int output_k = k - c; + int output_i = i - p; + if (output_i >= 0 && + output_i / stride_planes < output_planes && + output_j >= 0 && output_j / stride_rows < output_rows && + output_k >= 0 && output_k / stride_cols < output_cols && + output_i % stride_planes == 0 && + output_j % stride_rows == 0 && + output_k % stride_cols == 0) { + expected += input(id, i, j, k, b) * + output_backward(od, output_i / stride_planes, + output_j / stride_rows, + output_k / stride_cols, b); + } + } + } + } + } + EigenApprox(kernel_backward(od, id, p, r, c), expected); + } + } + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_batched_strided_cuboid_convolution_backward_kernel_valid_row_major) { + const int num_batches = 13; + const int input_depth = 2; + const int input_planes = 8; + const int input_rows = 7; + const int input_cols = 9; + const int output_depth = 3; + const int patch_planes = 3; + const int patch_rows = 3; + const int patch_cols = 2; + + const int stride_planes = 2; + const int stride_cols = 3; + const int stride_rows = 1; + + const int output_rows = ceil_div(input_rows - patch_rows + 1, stride_rows); + const int output_cols = ceil_div(input_cols - patch_cols + 1, stride_cols); + const int output_planes = + ceil_div(input_planes - patch_planes + 1, stride_planes); + + Tensor<float, 5, RowMajor> input(num_batches, input_cols, input_rows, + input_planes, input_depth); + Tensor<float, 5, RowMajor> kernel_backward( + patch_cols, patch_rows, patch_planes, input_depth, output_depth); + Tensor<float, 5, RowMajor> output_backward( + num_batches, output_cols, output_rows, output_planes, output_depth); + + output_backward = output_backward.constant(11.0f) + output_backward.random(); + input = input.constant(2.0f) + input.random(); + kernel_backward.setRandom(); + + kernel_backward = CuboidConvolutionBackwardKernel( + input, output_backward, patch_planes, patch_rows, patch_cols, + stride_planes, stride_rows, stride_cols); + + EXPECT_EQ(kernel_backward.dimension(4), output_depth); + EXPECT_EQ(kernel_backward.dimension(3), input_depth); + EXPECT_EQ(kernel_backward.dimension(2), patch_planes); + EXPECT_EQ(kernel_backward.dimension(1), patch_rows); + EXPECT_EQ(kernel_backward.dimension(0), patch_cols); + + for (int od = 0; od < output_depth; ++od) { + for (int id = 0; id < input_depth; ++id) { + for (int p = 0; p < patch_planes; ++p) { + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + float expected = 0.0f; + for (int b = 0; b < num_batches; ++b) { + for (int i = 0; i < input_planes; ++i) { + for (int j = 0; j < input_rows; ++j) { + for (int k = 0; k < input_cols; ++k) { + int output_j = j - r; + int output_k = k - c; + int output_i = i - p; + if (output_i >= 0 && + output_i / stride_planes < output_planes && + output_j >= 0 && output_j / stride_rows < output_rows && + output_k >= 0 && output_k / stride_cols < output_cols && + output_i % stride_planes == 0 && + output_j % stride_rows == 0 && + output_k % stride_cols == 0) { + expected += input(b, k, j, i, id) * + output_backward(b, output_k / stride_cols, + output_j / stride_rows, + output_i / stride_planes, od); + } + } + } + } + } + EigenApprox(kernel_backward(c, r, p, id, od), expected); + } + } + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_batched_strided_cuboid_convolution_backward_input_valid) { + const int num_batches = 13; + const int input_depth = 2; + const int input_planes = 14; + const int input_rows = 13; + const int input_cols = 15; + const int patch_rows = 3; + const int patch_cols = 2; + const int patch_planes = 4; + const int stride_rows = 3; + const int stride_cols = 2; + const int stride_planes = 3; + const int output_rows = ceil_div(input_rows - patch_rows + 1, stride_rows); + const int output_cols = ceil_div(input_cols - patch_cols + 1, stride_cols); + const int output_planes = + ceil_div(input_planes - patch_planes + 1, stride_planes); + const int output_depth = 5; + + Tensor<float, 5> input_backward(input_depth, input_planes, input_rows, + input_cols, num_batches); + Tensor<float, 5> kernel(output_depth, input_depth, patch_planes, patch_rows, + patch_cols); + Tensor<float, 5> output_backward(output_depth, output_planes, output_rows, + output_cols, num_batches); + + output_backward = output_backward.constant(11.0f) + output_backward.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + input_backward.setRandom(); + + input_backward = CuboidConvolutionBackwardInput( + kernel, output_backward, input_planes, input_rows, input_cols, + stride_planes, stride_rows, stride_cols); + + EXPECT_EQ(input_backward.dimension(4), num_batches); + EXPECT_EQ(input_backward.dimension(3), input_cols); + EXPECT_EQ(input_backward.dimension(2), input_rows); + EXPECT_EQ(input_backward.dimension(1), input_planes); + EXPECT_EQ(input_backward.dimension(0), input_depth); + + for (int b = 0; b < num_batches; ++b) { + for (int id = 0; id < input_depth; ++id) { + for (int i = 0; i < input_planes; ++i) { + for (int j = 0; j < input_rows; ++j) { + for (int k = 0; k < input_cols; ++k) { + float expected = 0.0f; + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + for (int p = 0; p < patch_planes; ++p) { + for (int od = 0; od < output_depth; ++od) { + int output_j = j - r; + int output_k = k - c; + int output_i = i - p; + if (output_i >= 0 && + output_i / stride_planes < output_planes && + output_j >= 0 && output_j / stride_rows < output_rows && + output_k >= 0 && output_k / stride_cols < output_cols && + output_i % stride_planes == 0 && + output_j % stride_rows == 0 && + output_k % stride_cols == 0) { + expected += output_backward(od, output_i / stride_planes, + output_j / stride_rows, + output_k / stride_cols, b) * + kernel(od, id, p, r, c); + } + } + } + } + } + EigenApprox(input_backward(id, i, j, k, b), expected); + } + } + } + } + } +} + +TEST(EigenBackwardSpatialConvolutionsTest, + test_batched_strided_cuboid_convolution_backward_input_valid_row_major) { + const int num_batches = 13; + const int input_depth = 2; + const int input_planes = 14; + const int input_rows = 13; + const int input_cols = 15; + const int patch_rows = 3; + const int patch_cols = 2; + const int patch_planes = 4; + const int stride_rows = 3; + const int stride_cols = 2; + const int stride_planes = 3; + const int output_rows = ceil_div(input_rows - patch_rows + 1, stride_rows); + const int output_cols = ceil_div(input_cols - patch_cols + 1, stride_cols); + const int output_planes = + ceil_div(input_planes - patch_planes + 1, stride_planes); + const int output_depth = 5; + + Tensor<float, 5, RowMajor> input_backward(num_batches, input_cols, input_rows, + input_planes, input_depth); + Tensor<float, 5, RowMajor> kernel(patch_cols, patch_rows, patch_planes, + input_depth, output_depth); + Tensor<float, 5, RowMajor> output_backward( + num_batches, output_cols, output_rows, output_planes, output_depth); + + output_backward = output_backward.constant(11.0f) + output_backward.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + input_backward.setRandom(); + + input_backward = CuboidConvolutionBackwardInput( + kernel, output_backward, input_planes, input_rows, input_cols, + stride_planes, stride_rows, stride_cols); + + EXPECT_EQ(input_backward.dimension(0), num_batches); + EXPECT_EQ(input_backward.dimension(1), input_cols); + EXPECT_EQ(input_backward.dimension(2), input_rows); + EXPECT_EQ(input_backward.dimension(3), input_planes); + EXPECT_EQ(input_backward.dimension(4), input_depth); + + for (int b = 0; b < num_batches; ++b) { + for (int id = 0; id < input_depth; ++id) { + for (int i = 0; i < input_planes; ++i) { + for (int j = 0; j < input_rows; ++j) { + for (int k = 0; k < input_cols; ++k) { + float expected = 0.0f; + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + for (int p = 0; p < patch_planes; ++p) { + for (int od = 0; od < output_depth; ++od) { + int output_j = j - r; + int output_k = k - c; + int output_i = i - p; + if (output_i >= 0 && + output_i / stride_planes < output_planes && + output_j >= 0 && output_j / stride_rows < output_rows && + output_k >= 0 && output_k / stride_cols < output_cols && + output_i % stride_planes == 0 && + output_j % stride_rows == 0 && + output_k % stride_cols == 0) { + expected += + output_backward(b, output_k / stride_cols, + output_j / stride_rows, + output_i / stride_planes, od) * + kernel(c, r, p, id, od); + } + } + } + } + } + EigenApprox(input_backward(b, k, j, i, id), expected); + } + } + } + } + } +} + +} // namespace Eigen diff --git a/tensorflow/core/kernels/eigen_cuboid_convolution.h b/tensorflow/core/kernels/eigen_cuboid_convolution.h new file mode 100644 index 0000000000..ed4c3fca1a --- /dev/null +++ b/tensorflow/core/kernels/eigen_cuboid_convolution.h @@ -0,0 +1,195 @@ +/* Copyright 2015 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_CUBOID_CONVOLUTION_H_ +#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_CUBOID_CONVOLUTION_H_ + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/kernels/eigen_patch_3d.h" + +namespace Eigen { + +/** CuboidConvolution + * \ingroup CXX11_NeuralNetworks_Module + * + * \brief Applies a 3D convolution over a multichannel input voxel block. + * + * The input parameter is expected to be a tensor with a rank of 4 or more (channels, depth, height, width, and optionally others). + * The kernel parameter is expected to be a 5D tensor (filters, channels, kernel_depth, kernel_height, kernel_width). + * The result can be assigned to a tensor of rank equal to the rank of the input. The dimensions of the result will be filters, depth, height, width (and others if applicable). + * + * The input and kernel have to be in the same layout, and both row-major and + * col-major are supported. The shapes given above are for col-major layout. + * For row-major, all dimensions should be reversed. + * + * It is possible to swap the order of the depth, width, and height dimensions provided that the same order is used in the input, the kernel, and the output. + */ +template <typename Input, typename Kernel> +EIGEN_ALWAYS_INLINE +static const typename internal::conditional < + internal::traits<Input>::Layout == ColMajor, + TensorReshapingOp< + const DSizes<typename internal::traits<Input>::Index, + internal::traits<Input>::NumDimensions>, + const TensorContractionOp< + const array<IndexPair<typename internal::traits<Input>::Index>, 1>, + const TensorReshapingOp< + const DSizes<typename internal::traits<Input>::Index, 2>, + const Kernel>, + const TensorReshapingOp< + const DSizes<typename internal::traits<Input>::Index, 2>, + const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, + const Input> > > >, + TensorReshapingOp< + const DSizes<typename internal::traits<Input>::Index, + internal::traits<Input>::NumDimensions>, + const TensorContractionOp< + const array<IndexPair<typename internal::traits<Input>::Index>, 1>, + const TensorReshapingOp< + const DSizes<typename internal::traits<Input>::Index, 2>, + const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, + const Input> > , + const TensorReshapingOp< + const DSizes<typename internal::traits<Input>::Index, 2>, + const Kernel> > > >::type +CuboidConvolution(const Input& input, const Kernel& kernel, + const DenseIndex stridePlanes = 1, + const DenseIndex strideRows = 1, + const DenseIndex strideCols = 1, + const PaddingType padding_type = PADDING_SAME) { + typedef typename internal::traits<Input>::Index TensorIndex; + TensorRef<Tensor<typename internal::traits<Input>::Scalar, internal::traits<Input>::NumDimensions, internal::traits<Input>::Layout, TensorIndex> > in(input); + TensorRef<Tensor<typename internal::traits<Kernel>::Scalar, internal::traits<Kernel>::NumDimensions, internal::traits<Kernel>::Layout, TensorIndex> > kern(kernel); + + EIGEN_STATIC_ASSERT(internal::traits<Input>::Layout == internal::traits<Kernel>::Layout, YOU_MADE_A_PROGRAMMING_MISTAKE); + static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor); + static const int NumDims = internal::traits<Input>::NumDimensions; + + // Number of filters to apply. This is the same as the output depth of the result. + const TensorIndex kernelFilters = isColMajor ? kern.dimensions()[0] : kern.dimensions()[4]; + const TensorIndex kernelChannels = isColMajor ? kern.dimensions()[1] : kern.dimensions()[3]; + + // Spatial size of the kernel. + const TensorIndex kernelDepth = isColMajor ? kern.dimensions()[2] : kern.dimensions()[2]; + const TensorIndex kernelRows = isColMajor ? kern.dimensions()[3] : kern.dimensions()[1]; + const TensorIndex kernelCols = isColMajor ? kern.dimensions()[4] : kern.dimensions()[0]; + + if (isColMajor) { + eigen_assert(kernelChannels == in.dimension(0)); + } else { + eigen_assert(kernelChannels == in.dimension(NumDims - 1)); + } + + const TensorIndex inputPlanes = isColMajor ? in.dimension(1) : in.dimension(NumDims - 2); + const TensorIndex inputRows = isColMajor ? in.dimension(2) : in.dimension(NumDims - 3); + const TensorIndex inputCols = isColMajor ? in.dimension(3) : in.dimension(NumDims - 4); + + const float stride_planes_f = static_cast<float>(stridePlanes); + const float stride_rows_f = static_cast<float>(strideRows); + const float stride_cols_f = static_cast<float>(strideCols); + TensorIndex out_depth; + TensorIndex out_height; + TensorIndex out_width; + switch (padding_type) { + case PADDING_VALID: + out_depth = ceil((inputPlanes - kernelDepth + 1.f) / stride_planes_f); + out_height = ceil((inputRows - kernelRows + 1.f) / stride_rows_f); + out_width = ceil((inputCols - kernelCols + 1.f) / stride_cols_f); + break; + case PADDING_SAME: + out_depth = ceil(inputPlanes / stride_planes_f); + out_height = ceil(inputRows / stride_rows_f); + out_width = ceil(inputCols / stride_cols_f); + break; + default: + eigen_assert(false && "unexpected padding"); + } + + DSizes<TensorIndex, 2> kernel_dims; + if (isColMajor) { + kernel_dims[0] = kernelFilters; + kernel_dims[1] = kernelChannels * kernelDepth * kernelRows * kernelCols; + } else { + kernel_dims[0] = kernelChannels * kernelDepth * kernelRows * kernelCols; + kernel_dims[1] = kernelFilters; + } + + // Molds the output of the patch extraction result into a 2D tensor: + // - the first dimension (dims[0]): the patch values to be multiplied with the kernels + // - the second dimension (dims[1]): everything else + DSizes<TensorIndex, 2> pre_contract_dims; + if (isColMajor) { + pre_contract_dims[0] = kernelChannels * kernelDepth * kernelRows * kernelCols; + pre_contract_dims[1] = out_depth * out_height * out_width; + for (int i = 4; i < NumDims; ++i) { + pre_contract_dims[1] *= in.dimension(i); + } + } else { + pre_contract_dims[1] = kernelChannels * kernelDepth * kernelRows * kernelCols; + pre_contract_dims[0] = out_depth * out_height * out_width; + for (int i = 0; i < NumDims - 4; ++i) { + pre_contract_dims[0] *= in.dimension(i); + } + } + + array<IndexPair<TensorIndex>, 1> contract_dims; + contract_dims[0] = IndexPair<TensorIndex>(1, 0); + + // Molds the output of the contraction into the shape expected by the user + // (assuming ColMajor): + // - 1st dim: kernel filters + // - 2nd dim: output depth + // - 3nd dim: output height + // - 4rd dim: output width + // - 5th dim and beyond: everything else including batch size + DSizes<TensorIndex, NumDims> post_contract_dims; + if (isColMajor) { + post_contract_dims[0] = kernelFilters; + post_contract_dims[1] = out_depth; + post_contract_dims[2] = out_height; + post_contract_dims[3] = out_width; + for (int i = 4; i < NumDims; ++i) { + post_contract_dims[i] = in.dimension(i); + } + } else { + post_contract_dims[NumDims - 1] = kernelFilters; + post_contract_dims[NumDims - 2] = out_depth; + post_contract_dims[NumDims - 3] = out_height; + post_contract_dims[NumDims - 4] = out_width; + for (int i = 0; i < NumDims - 4; ++i) { + post_contract_dims[i] = in.dimension(i); + } + } + + return choose( + Cond<internal::traits<Input>::Layout == ColMajor>(), + kernel.reshape(kernel_dims) + .contract(input.extract_volume_patches( + kernelDepth, kernelRows, kernelCols, stridePlanes, + strideRows, strideCols, padding_type) + .reshape(pre_contract_dims), + contract_dims) + .reshape(post_contract_dims), + input.extract_volume_patches(kernelDepth, kernelRows, kernelCols, + stridePlanes, strideRows, strideCols, + padding_type) + .reshape(pre_contract_dims) + .contract(kernel.reshape(kernel_dims), contract_dims) + .reshape(post_contract_dims)); +} + +} // end namespace Eigen + +#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_CUBOID_CONVOLUTION_H_ diff --git a/tensorflow/core/kernels/eigen_patch_3d.h b/tensorflow/core/kernels/eigen_patch_3d.h new file mode 100644 index 0000000000..900d406709 --- /dev/null +++ b/tensorflow/core/kernels/eigen_patch_3d.h @@ -0,0 +1,257 @@ +/* Copyright 2015 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_PATCH_3D_H_ +#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_PATCH_3D_H_ + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +#if not defined(__CUDACC__) +#include <type_traits> +#endif + +namespace Eigen { +namespace internal { + +/** Extract3DPatches + * \ingroup CXX11_NeuralNetworksModule + * + * \brief Extracts 3D patches from a multichannel input volume. + * + * The input parameter is expected to be a tensor with a rank of 4 or more + * (channels, depth, height, width, optional others in col-major, and the + * reverse order in row-major). + + * The return value will be a tensor of 3 more dimension than the input tensor. + * In col-major, the first 4 dimensions of the result are: channels, patch_depth, + * patch_height, patch_width. The next dimensions will identify the patch + * position on the 3D grid of extracted patches: z, y, x. The remaining + * dimensions, if any, will be the same as the 'other' dimensions of the input + * tensor. + */ + +template <typename Input> +EIGEN_ALWAYS_INLINE static const TensorStridingOp< + const array<typename internal::traits<Input>::Index, + internal::traits<Input>::NumDimensions + 3>, + const TensorReshapingOp< + const DSizes<typename internal::traits<Input>::Index, + internal::traits<Input>::NumDimensions + 3>, + const TensorPatchOp< + const DSizes<typename internal::traits<Input>::Index, + internal::traits<Input>::NumDimensions>, + const TensorPaddingOp< + const array<IndexPair<typename internal::traits<Input>::Index>, + internal::traits<Input>::NumDimensions>, + const Input> > > > +Extract3DPatches( + const Input& input, const DenseIndex patchPlanes, + const DenseIndex patchRows, const DenseIndex patchCols, + const DenseIndex stridePlanes, const DenseIndex strideRows, + const DenseIndex strideCols, + const DenseIndex paddingZTop, const DenseIndex paddingZBottom, + const DenseIndex paddingTop, const DenseIndex paddingBottom, + const DenseIndex paddingLeft, const DenseIndex paddingRight, + const typename internal::traits<Input>::Scalar padding_value = 0) { + + typedef typename internal::traits<Input>::Index TensorIndex; + TensorRef<Tensor<typename internal::traits<Input>::Scalar, internal::traits<Input>::NumDimensions, internal::traits<Input>::Layout, TensorIndex> > in(input); + + EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions >= 4, YOU_MADE_A_PROGRAMMING_MISTAKE); + + static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor); + static const int NumDims = internal::traits<Input>::NumDimensions; + static const int ExtDims = NumDims + 3; + + // Tensor size after patch extraction. We add three dimensions to unpack the + // linear patch index into a 3D grid over which stride() can work. + DSizes<TensorIndex, ExtDims> pre_stride_dims; + + if (isColMajor) { + pre_stride_dims[0] = in.dimension(0); + pre_stride_dims[1] = patchPlanes; + pre_stride_dims[2] = patchRows; + pre_stride_dims[3] = patchCols; + } else { + pre_stride_dims[ExtDims - 1] = in.dimension(NumDims - 1); + pre_stride_dims[ExtDims - 4] = patchCols; + pre_stride_dims[ExtDims - 3] = patchRows; + pre_stride_dims[ExtDims - 2] = patchPlanes; + } + + const TensorIndex inputPlanes = isColMajor ? in.dimension(1) : in.dimension(NumDims - 2); + const TensorIndex inputRows = isColMajor ? in.dimension(2) : in.dimension(NumDims - 3); + const TensorIndex inputCols = isColMajor ? in.dimension(3) : in.dimension(NumDims - 4); + + array<IndexPair<TensorIndex>, NumDims> paddings; + for (int i = 0; i < NumDims; ++i) { + paddings[i] = IndexPair<TensorIndex>(0, 0); + } + + paddings[isColMajor ? 1 : (NumDims - 2)] = IndexPair<TensorIndex>(paddingZTop, paddingZBottom); + paddings[isColMajor ? 2 : (NumDims - 3)] = IndexPair<TensorIndex>(paddingTop, paddingBottom); + paddings[isColMajor ? 3 : (NumDims - 4)] = IndexPair<TensorIndex>(paddingLeft, paddingRight); + + pre_stride_dims[isColMajor ? 4 : (ExtDims - 5)] = inputPlanes + paddingZBottom + paddingZTop - patchPlanes + 1; + pre_stride_dims[isColMajor ? 5 : (ExtDims - 6)] = inputRows + paddingTop + paddingBottom - patchRows + 1; + pre_stride_dims[isColMajor ? 6 : (ExtDims - 7)] = inputCols + paddingLeft + paddingRight - patchCols + 1; + + if (isColMajor) { + for (int i = 7; i < NumDims + 3; ++i) { + pre_stride_dims[i] = in.dimension(i - 3); + } + } else { + for (int i = 0; i < NumDims - 4; ++i) { + pre_stride_dims[i] = in.dimension(i); + } + } + + DSizes<TensorIndex, NumDims> patch_dims; + if (isColMajor) { + patch_dims[0] = in.dimension(0); + patch_dims[1] = patchPlanes; + patch_dims[2] = patchRows; + patch_dims[3] = patchCols; + for (int i = 4; i < NumDims; ++i) { + patch_dims[i] = 1; + } + } else { + patch_dims[NumDims - 1] = in.dimension(NumDims - 1); + patch_dims[NumDims - 4] = patchCols; + patch_dims[NumDims - 3] = patchRows; + patch_dims[NumDims - 2] = patchPlanes; + for (int i = 0; i < NumDims - 4; i++) { + patch_dims[i] = 1; + } + } + + array<TensorIndex, NumDims + 3> strides; + if (isColMajor) { + // No striding within the patches. + for (int i = 0; i < 4; ++i) { + strides[i] = 1; + } + // Apply striding in the spatial patch grid dimensions only. + strides[4] = stridePlanes; + strides[5] = strideRows; + strides[6] = strideCols; + // No striding in the remaining dimensions (batches, ...). + for (int i = 7; i < NumDims + 3; i++) { + strides[i] = 1; + } + } else { + // No striding within the patches. + for (int i = 1; i <= 4; ++i) { + strides[ExtDims - i] = 1; + } + // Apply striding in the spatial patch grid dimensions only. + strides[ExtDims - 7] = strideCols; + strides[ExtDims - 6] = strideRows; + strides[ExtDims - 5] = stridePlanes; + // No striding in the remaining dimensions (batches, ...). + for (int i = 0; i < NumDims - 4; i++) { + strides[i] = 1; + } + } + + // TODO(mjanusz): Consider getting rid of pad(), and stride() and extend + // extract_patches to take additional parameters for padding/striding, + // similarly to etract_image_patches. + return input.pad(paddings, padding_value).extract_patches(patch_dims).reshape(pre_stride_dims).stride(strides); +} + + +template <typename Input> +EIGEN_ALWAYS_INLINE static const TensorStridingOp< + const array<typename internal::traits<Input>::Index, + internal::traits<Input>::NumDimensions + 3>, + const TensorReshapingOp< + const DSizes<typename internal::traits<Input>::Index, + internal::traits<Input>::NumDimensions + 3>, + const TensorPatchOp< + const DSizes<typename internal::traits<Input>::Index, + internal::traits<Input>::NumDimensions>, + const TensorPaddingOp< + const array<IndexPair<typename internal::traits<Input>::Index>, + internal::traits<Input>::NumDimensions>, + const Input> > > > +Extract3DPatches( + const Input& input, const DenseIndex patchPlanes, + const DenseIndex patchRows, const DenseIndex patchCols, + const DenseIndex stridePlanes, const DenseIndex strideRows, + const DenseIndex strideCols, const PaddingType padding_type, + const typename internal::traits<Input>::Scalar padding_value = 0) { + typedef typename internal::traits<Input>::Index TensorIndex; + TensorRef<Tensor<typename internal::traits<Input>::Scalar, internal::traits<Input>::NumDimensions, internal::traits<Input>::Layout, TensorIndex> > in(input); + + EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions >= 4, YOU_MADE_A_PROGRAMMING_MISTAKE); + + static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor); + static const int NumDims = internal::traits<Input>::NumDimensions; + + const TensorIndex inputPlanes = isColMajor ? in.dimension(1) : in.dimension(NumDims - 2); + const TensorIndex inputRows = isColMajor ? in.dimension(2) : in.dimension(NumDims - 3); + const TensorIndex inputCols = isColMajor ? in.dimension(3) : in.dimension(NumDims - 4); + + switch (padding_type) { + case PADDING_VALID: + // No padding in any dimension. + return Extract3DPatches(input, patchPlanes, patchRows, patchCols, + stridePlanes, strideRows, strideCols, + 0, 0, 0, 0, 0, 0, padding_value); + case PADDING_SAME: { + // The side of the tensor before striding should be just the expected + // output times the stride. + const TensorIndex size_z = ceil(inputPlanes / static_cast<float>(stridePlanes)) * stridePlanes; + const TensorIndex size_y = ceil(inputRows / static_cast<float>(strideRows)) * strideRows; + const TensorIndex size_x = ceil(inputCols / static_cast<float>(strideCols)) * strideCols; + + // The size of the patch space is going to be: padded_input_size - patch_size + 1. + // This has to match the expected size before striding (pre_stride_dims). + // The deltas below extend the input to the expected size. + const TensorIndex dz = size_z + patchPlanes - 1 - inputPlanes; + const TensorIndex dy = size_y + patchRows - 1 - inputRows; + const TensorIndex dx = size_x + patchCols - 1 - inputCols; + + return Extract3DPatches(input, patchPlanes, patchRows, patchCols, + stridePlanes, strideRows, strideCols, + dz - dz / 2, dz / 2, + dy - dy / 2, dy / 2, + dx - dx / 2, dx / 2, + padding_value); + } + default: + eigen_assert(false && "unexpected padding"); + // unreachable code to avoid missing return warning. + return Extract3DPatches(input, patchPlanes, patchRows, patchCols, + stridePlanes, strideRows, strideCols, + 0, 0, 0, 0, 0, 0, padding_value); + } +} + +// TODO(mjanusz): Switch this to a 'using' alias once CUDA supports C++11. +template <typename Input> +struct Extract3DPatchesType { + typedef const TensorStridingOp< const array<typename internal::traits<Input>::Index, internal::traits<Input>::NumDimensions + 3>, + const TensorReshapingOp< const DSizes<typename internal::traits<Input>::Index, internal::traits<Input>::NumDimensions + 3>, + const TensorPatchOp< const DSizes<typename internal::traits<Input>::Index, internal::traits<Input>::NumDimensions>, + const TensorPaddingOp< const array< IndexPair<typename internal::traits<Input>::Index>, internal::traits<Input>::NumDimensions>, + const Input> > > > type; +}; + +} // end namespace internal +} // end namespace Eigen + +#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_PATCH_3D_H_ diff --git a/tensorflow/core/kernels/eigen_pooling.h b/tensorflow/core/kernels/eigen_pooling.h new file mode 100644 index 0000000000..7ded806b74 --- /dev/null +++ b/tensorflow/core/kernels/eigen_pooling.h @@ -0,0 +1,441 @@ +/* Copyright 2015 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_POOLING_H_ +#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_POOLING_H_ + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" +#include "tensorflow/core/kernels/eigen_patch_3d.h" + +namespace Eigen { + +/** SpatialMaxPooling + * \ingroup CXX11_NeuralNetworks_Module + * + * \brief Applies a max-pooling over a multichannel input image. + * + * The input parameter is expected to be a with a rank of 4 (channels, height, width, others in col-major, and the reverse of that in row-major). + * + * The result can be assigned to a tensor of rank equal to the rank of the input. The dimensions of the result will be channels, height, width, and others (in col-major, and the reverse of that if the input was row-major). + * + * The order of the width and height dimensions can be swapped if needed. + * +*/ +#if !defined(EIGEN_HAS_INDEX_LIST) +template <typename Input> +EIGEN_ALWAYS_INLINE +static const TensorReshapingOp<const Eigen::DSizes<typename internal::traits<Input>::Index, internal::traits<Input>::NumDimensions>, const TensorReductionOp<internal::MaxReducer<typename internal::remove_const<typename internal::traits<Input>::Scalar>::type>, const Eigen::array<int, 2>, const TensorImagePatchOp<Dynamic, Dynamic, const Input> > > +#else +template <typename Input> +EIGEN_ALWAYS_INLINE +static const TensorReshapingOp<const Eigen::DSizes<typename internal::traits<Input>::Index, internal::traits<Input>::NumDimensions>, const TensorReductionOp<internal::MaxReducer<typename internal::remove_const<typename internal::traits<Input>::Scalar>::type>, typename internal::conditional<internal::traits<Input>::Layout == ColMajor, const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> >, const Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3> > >::type, const TensorImagePatchOp<Dynamic, Dynamic, const Input> > > +#endif +SpatialMaxPooling(const Input& input, DenseIndex patchRows, DenseIndex patchCols, + DenseIndex strideRows, DenseIndex strideCols, const PaddingType padding_type, + DenseIndex in_strideRows = 1, DenseIndex in_strideCols = 1) +{ + EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 4, YOU_MADE_A_PROGRAMMING_MISTAKE); + + typedef typename internal::traits<Input>::Index TensorIndex; + TensorRef<Tensor<typename internal::traits<Input>::Scalar, internal::traits<Input>::NumDimensions, internal::traits<Input>::Layout, TensorIndex> > in(input); + + const DenseIndex patchRowsEff = patchRows + (patchRows - 1) * (in_strideRows - 1); + const DenseIndex patchColsEff = patchCols + (patchCols - 1) * (in_strideCols - 1); + + static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor); + static const int idxRows = isColMajor ? 1 : 2; + static const int idxCols = isColMajor ? 2 : 1; + + // Molds the output of the reduction into the shape expected by the user. + // (assuming col-major): + // - 1st dim: channels + // - 2nd dim: output height + // - 3rd dim: output width + // - 4th dim and beyond: everything else including batch size + Eigen::DSizes<TensorIndex, internal::traits<Input>::NumDimensions> post_reduce_dims; + post_reduce_dims[0] = in.dimension(0); + if (padding_type == PADDING_VALID) { + post_reduce_dims[idxRows] = numext::ceil((in.dimension(idxRows) - patchRowsEff + 1.f) / static_cast<float>(strideRows)); + post_reduce_dims[idxCols] = numext::ceil((in.dimension(idxCols) - patchColsEff + 1.f) / static_cast<float>(strideCols)); + } else { + post_reduce_dims[idxRows] = numext::ceil(in.dimension(idxRows) / static_cast<float>(strideRows)); + post_reduce_dims[idxCols] = numext::ceil(in.dimension(idxCols) / static_cast<float>(strideCols)); + } + post_reduce_dims[3] = in.dimension(3); + +#if !defined(EIGEN_HAS_INDEX_LIST) + // nvcc doesn't support cxx11 + Eigen::array<int, 2> reduction_dims; + if (isColMajor) { + reduction_dims[0] = 1; + reduction_dims[1] = 2; + } else { + reduction_dims[0] = 2; + reduction_dims[1] = 3; + } +#else + // Take advantage of cxx11 to give the compiler information it can use to + // optimize the code. + typename internal::conditional<internal::traits<Input>::Layout == ColMajor, const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> >, const Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3> > >::type reduction_dims; +#endif + + return input.extract_image_patches(patchRows, patchCols, strideRows, strideCols, in_strideRows, in_strideCols, padding_type, -Eigen::NumTraits<typename internal::remove_const<typename internal::traits<Input>::Scalar>::type>::highest()).maximum(reduction_dims).reshape(post_reduce_dims); +} + +/** CuboidMaxPooling + * \ingroup CXX11_NeuralNetworks_Module + * + * \brief Applies a max-pooling over a multichannel input volume. + * + * The input parameter is expected to be a tensor with a rank of 5 (channels, depth, height, width, others in col-major, and the reverse of that in row-major). + * + * The result can be assigned to a tensor of rank equal to the rank of the input. The dimensions of the result will be channels, depth, height, width, and others (in col-major, and the reverse of that if the input was row-major). + * + * The order of the depth, width and height dimensions can be swapped if needed. + * +*/ +#if !defined(EIGEN_HAS_INDEX_LIST) +template <typename Input> +EIGEN_ALWAYS_INLINE static const TensorReshapingOp< + const Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>, + const TensorReductionOp< + internal::MaxReducer<float>, const Eigen::array<int, 1>, + const TensorReshapingOp< + const Eigen::DSizes<DenseIndex, 3>, + const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Input> > > > +#else +template <typename Input> +EIGEN_ALWAYS_INLINE static const TensorReshapingOp< + const Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>, + const TensorReductionOp< + internal::MaxReducer<float>, + const Eigen::IndexList<Eigen::type2index<1> >, + const TensorReshapingOp< + const Eigen::DSizes<DenseIndex, 3>, + const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Input> > > > +#endif +CuboidMaxPooling(const Input& input, DenseIndex patchPlanes, + DenseIndex patchRows, DenseIndex patchCols, + DenseIndex stridePlanes, DenseIndex strideRows, + DenseIndex strideCols, const PaddingType padding_type) { + EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 5, YOU_MADE_A_PROGRAMMING_MISTAKE); + static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor); + + typedef typename internal::traits<Input>::Index TensorIndex; + TensorRef<Tensor<typename internal::traits<Input>::Scalar, internal::traits<Input>::NumDimensions, internal::traits<Input>::Layout, TensorIndex> > in(input); + + static const int idxPlanes = isColMajor ? 1 : 3; + static const int idxRows = 2; + static const int idxCols = isColMajor ? 3 : 1; + + // Molds the output of the reduction into the shape expected by the used + // (assuming col-major): + // - 1st dim: channels + // - 2nd dim: output depth + // - 3rd dim: output height + // - 4th dim: output width + // - 5th dim and beyond: everything else including batch size + Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions> post_reduce_dims; + post_reduce_dims[0] = in.dimension(0); + if (padding_type == PADDING_VALID) { + post_reduce_dims[idxPlanes] = numext::ceil((in.dimension(idxPlanes) - patchPlanes + 1.f) / static_cast<float>(stridePlanes)); + post_reduce_dims[idxRows] = numext::ceil((in.dimension(idxRows) - patchRows + 1.f) / static_cast<float>(strideRows)); + post_reduce_dims[idxCols] = numext::ceil((in.dimension(idxCols) - patchCols + 1.f) / static_cast<float>(strideCols)); + } else { + post_reduce_dims[idxPlanes] = numext::ceil(in.dimension(idxPlanes) / static_cast<float>(stridePlanes)); + post_reduce_dims[idxRows] = numext::ceil(in.dimension(idxRows) / static_cast<float>(strideRows)); + post_reduce_dims[idxCols] = numext::ceil(in.dimension(idxCols) / static_cast<float>(strideCols)); + } + post_reduce_dims[4] = in.dimension(4); + + Eigen::DSizes<DenseIndex, 3> pre_reduce_dims; + pre_reduce_dims[1] = patchRows * patchCols * patchPlanes; + if (isColMajor) { + pre_reduce_dims[0] = post_reduce_dims[0]; + pre_reduce_dims[2] = post_reduce_dims[1] * post_reduce_dims[2] * post_reduce_dims[3] * post_reduce_dims[4]; + } else { + pre_reduce_dims[0] = post_reduce_dims[0] * post_reduce_dims[1] * post_reduce_dims[2] * post_reduce_dims[3]; + pre_reduce_dims[2] = post_reduce_dims[4]; + } + +#if !defined(EIGEN_HAS_INDEX_LIST) + // nvcc doesn't support cxx11 + Eigen::array<int, 1> reduction_dims; + reduction_dims[0] = 1; +#else + // Take advantage of cxx11 to give the compiler information it can use to + // optimize the code. + Eigen::IndexList<Eigen::type2index<1> > reduction_dims; +#endif + return input.extract_volume_patches(patchPlanes, patchRows, patchCols, + stridePlanes, strideRows, strideCols, + padding_type, -Eigen::NumTraits<float>::highest()) + .reshape(pre_reduce_dims) + .maximum(reduction_dims) + .reshape(post_reduce_dims); +} + + +/** SpatialAvgPooling + * \ingroup CXX11_NeuralNetworks_Module + * + * \brief Applies an average pooling over a multichannel input image. + * + * The input parameter is expected to be a tensor with a rank of 4 (channels, height, width, others in col-major, and the reverse of that in row-major). + * + * The result can be assigned to a tensor of rank equal to the rank of the input. The dimensions of the result will be channels, height, width, and others (in col-major, and the reverse of that if the input was row-major). + * + * The order of the width and height dimensions can be swapped if needed. + * +*/ +namespace internal { + +template <typename T> struct AvgPoolMeanReducer +{ +#if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__) + // We only support packet access for floats. + static const bool PacketAccess = internal::is_same<T, float>::value; +#else + static const bool PacketAccess = false; +#endif + static const bool IsStateful = true; + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE AvgPoolMeanReducer() : scalarCount_(0) { + typedef typename packet_traits<T>::type Packet; + packetCount_ = pset1<Packet>(0.0); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) { + if (t != -Eigen::NumTraits<T>::highest()) { + (*accum) = (*accum) + t; + scalarCount_++; + } + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { + return static_cast<T>(0); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { + eigen_assert(scalarCount_ > 0); + return accum / scalarCount_; + } + +#if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__) +#ifdef EIGEN_VECTORIZE_AVX +#define pequal(a,b) _mm256_cmp_ps(a,b,_CMP_EQ_UQ) +#define psel(a,b,false_mask) _mm256_blendv_ps(a,b,false_mask) +#else +#define pequal(a,b) _mm_cmpeq_ps(a,b) +#define psel(a,b,false_mask) _mm_or_ps(_mm_andnot_ps(false_mask, a), _mm_and_ps(false_mask, b)) +#endif + + template <typename Packet> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) { + reducePacketWithType(static_cast<T>(0), p, accum); + } + + template <typename Packet> + void reducePacketWithType(T, const Packet& p, Packet* accum) { + Packet skip_mask = pequal(p, pset1<Packet>(-Eigen::NumTraits<T>::highest())); + (*accum) = padd<Packet>(*accum, psel(p, pset1<Packet>(0), skip_mask)); + packetCount_ = padd<Packet>(packetCount_, psel(pset1<Packet>(1), pset1<Packet>(0), skip_mask)); + } + + template <typename Packet> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { + return pset1<Packet>(0); + } + + template <typename Packet> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { + return pdiv(vaccum, packetCount_); + } + template <typename Packet> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { + return (saccum + predux(vaccum)) / (scalarCount_ + predux(packetCount_)); + } +#endif + + protected: + typedef typename packet_traits<T>::type Packet; + int scalarCount_; + Packet packetCount_; +}; + +} // namespace internal + +#if !defined(EIGEN_HAS_INDEX_LIST) +template <typename Input> +EIGEN_ALWAYS_INLINE +static const TensorReshapingOp<const Eigen::DSizes<typename internal::traits<Input>::Index, internal::traits<Input>::NumDimensions>, const TensorReductionOp<internal::AvgPoolMeanReducer<typename internal::remove_const<typename internal::traits<Input>::Scalar>::type>, const Eigen::array<int, 2>, const TensorImagePatchOp<Dynamic, Dynamic, const Input> > > +#else +template <typename Input> +EIGEN_ALWAYS_INLINE +static const TensorReshapingOp<const Eigen::DSizes<typename internal::traits<Input>::Index, internal::traits<Input>::NumDimensions>, const TensorReductionOp<internal::AvgPoolMeanReducer<typename internal::remove_const<typename internal::traits<Input>::Scalar>::type>, typename internal::conditional<internal::traits<Input>::Layout == ColMajor, const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> >, const Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3> > >::type, const TensorImagePatchOp<Dynamic, Dynamic, const Input> > > +#endif +SpatialAvgPooling(const Input& input, DenseIndex patchRows, DenseIndex patchCols, + DenseIndex strideRows, DenseIndex strideCols, const PaddingType padding_type, + DenseIndex in_strideRows = 1, DenseIndex in_strideCols = 1) +{ + EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 4, YOU_MADE_A_PROGRAMMING_MISTAKE); + + typedef typename internal::traits<Input>::Index TensorIndex; + TensorRef<Tensor<typename internal::traits<Input>::Scalar, internal::traits<Input>::NumDimensions, internal::traits<Input>::Layout, TensorIndex> > in(input); + + const DenseIndex patchRowsEff = patchRows + (patchRows - 1) * (in_strideRows - 1); + const DenseIndex patchColsEff = patchCols + (patchCols - 1) * (in_strideCols - 1); + + static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor); + static const int idxRows = isColMajor ? 1 : 2; + static const int idxCols = isColMajor ? 2 : 1; + + // Molds the output of the reduction into the shape expected by the user. + // (assuming col-major): + // - 1st dim: channels + // - 2nd dim: output height + // - 3rd dim: output width + // - 4th dim and beyond: everything else including batch size + Eigen::DSizes<TensorIndex, internal::traits<Input>::NumDimensions> post_reduce_dims; + post_reduce_dims[0] = in.dimension(0); + if (padding_type == PADDING_VALID) { + post_reduce_dims[idxRows] = numext::ceil((in.dimension(idxRows) - patchRowsEff + 1.f) / static_cast<float>(strideRows)); + post_reduce_dims[idxCols] = numext::ceil((in.dimension(idxCols) - patchColsEff + 1.f) / static_cast<float>(strideCols)); + } else { + post_reduce_dims[idxRows] = numext::ceil(in.dimension(idxRows) / static_cast<float>(strideRows)); + post_reduce_dims[idxCols] = numext::ceil(in.dimension(idxCols) / static_cast<float>(strideCols)); + } + post_reduce_dims[3] = in.dimension(3); + + typedef typename internal::remove_const<typename internal::traits<Input>::Scalar>::type CoeffReturnType; + internal::AvgPoolMeanReducer<CoeffReturnType> mean_with_nan; + +#if !defined(EIGEN_HAS_INDEX_LIST) + // nvcc doesn't support cxx11 + Eigen::array<int, 2> reduction_dims; + if (isColMajor) { + reduction_dims[0] = 1; + reduction_dims[1] = 2; + } else { + reduction_dims[0] = 2; + reduction_dims[1] = 3; + } +#else + // Take advantage of cxx11 to give the compiler information it can use to + // optimize the code. + typename internal::conditional<internal::traits<Input>::Layout == ColMajor, const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> >, const Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3> > >::type reduction_dims; +#endif + return input.extract_image_patches(patchRows, patchCols, strideRows, strideCols, in_strideRows, in_strideCols, padding_type, -Eigen::NumTraits<typename internal::remove_const<typename internal::traits<Input>::Scalar>::type>::highest()).reduce(reduction_dims, mean_with_nan).reshape(post_reduce_dims); +} + + +/** CuboidAvgPooling + * \ingroup CXX11_NeuralNetworks_Module + * + * \brief Applies an average pooling over a multichannel input volume. + * + * The input parameter is expected to be a tensor with a rank of 5 (channels, depth, height, width, others, and the reverse of that in row-major). + * + * The result can be assigned to a tensor of rank equal to the rank of the input. The dimensions of the result will be channels, depth, width, and others (in col-major, and the reverse of that if the input was row-major). + * + * The order of the depth, width and height dimensions can be swapped if needed. + * +*/ +#if !defined(EIGEN_HAS_INDEX_LIST) +template <typename Input> +EIGEN_ALWAYS_INLINE static const TensorReshapingOp< + const Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>, + const TensorReductionOp< + internal::AvgPoolMeanReducer<float>, const Eigen::array<int, 1>, + const TensorReshapingOp< + const Eigen::DSizes<DenseIndex, 3>, + const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Input> > > > +#else +template <typename Input> +EIGEN_ALWAYS_INLINE static const TensorReshapingOp< + const Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>, + const TensorReductionOp< + internal::AvgPoolMeanReducer<float>, + const Eigen::IndexList<Eigen::type2index<1> >, + const TensorReshapingOp< + const Eigen::DSizes<DenseIndex, 3>, + const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Input> > > > +#endif +CuboidAvgPooling(const Input& input, DenseIndex patchPlanes, + DenseIndex patchRows, DenseIndex patchCols, + DenseIndex stridePlanes, DenseIndex strideRows, + DenseIndex strideCols, const PaddingType padding_type) { + EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 5, YOU_MADE_A_PROGRAMMING_MISTAKE); + static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor); + + typedef typename internal::traits<Input>::Index TensorIndex; + TensorRef<Tensor<typename internal::traits<Input>::Scalar, internal::traits<Input>::NumDimensions, internal::traits<Input>::Layout, TensorIndex> > in(input); + + static const int idxPlanes = isColMajor ? 1 : 3; + static const int idxRows = 2; + static const int idxCols = isColMajor ? 3 : 1; + // Molds the output of the reduction into the shape expected by the used + // (assuming col-major): + // - 1st dim: channels + // - 2nd dim: outupt depth + // - 3rd dim: output height + // - 4th dim: output width + // - 5th dim and beyond: everything else including batch size + Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions> post_reduce_dims; + post_reduce_dims[0] = in.dimension(0); + if (padding_type == PADDING_VALID) { + post_reduce_dims[idxPlanes] = numext::ceil((in.dimension(idxPlanes) - patchPlanes + 1.f) / static_cast<float>(stridePlanes)); + post_reduce_dims[idxRows] = numext::ceil((in.dimension(idxRows) - patchRows + 1.f) / static_cast<float>(strideRows)); + post_reduce_dims[idxCols] = numext::ceil((in.dimension(idxCols) - patchCols + 1.f) / static_cast<float>(strideCols)); + } else { + post_reduce_dims[idxPlanes] = numext::ceil(in.dimension(idxPlanes) / static_cast<float>(stridePlanes)); + post_reduce_dims[idxRows] = numext::ceil(in.dimension(idxRows) / static_cast<float>(strideRows)); + post_reduce_dims[idxCols] = numext::ceil(in.dimension(idxCols) / static_cast<float>(strideCols)); + } + post_reduce_dims[4] = in.dimension(4); + + Eigen::DSizes<DenseIndex, 3> pre_reduce_dims; + pre_reduce_dims[1] = patchRows * patchCols * patchPlanes; + if (isColMajor) { + pre_reduce_dims[0] = post_reduce_dims[0]; + pre_reduce_dims[2] = post_reduce_dims[1] * post_reduce_dims[2] * post_reduce_dims[3] * post_reduce_dims[4]; + } else { + pre_reduce_dims[0] = post_reduce_dims[0] * post_reduce_dims[1] * post_reduce_dims[2] * post_reduce_dims[3]; + pre_reduce_dims[2] = post_reduce_dims[4]; + } + + typedef typename internal::remove_const<typename internal::traits<Input>::Scalar>::type CoeffReturnType; + internal::AvgPoolMeanReducer<CoeffReturnType> mean_with_nan; + +#if !defined(EIGEN_HAS_INDEX_LIST) + // nvcc doesn't support cxx11 + Eigen::array<int, 1> reduction_dims; + reduction_dims[0] = 1; +#else + // Take advantage of cxx11 to give the compiler information it can use to + // optimize the code. + Eigen::IndexList<Eigen::type2index<1> > reduction_dims; +#endif + return input.extract_volume_patches(patchPlanes, patchRows, patchCols, + stridePlanes, strideRows, strideCols, + padding_type, -Eigen::NumTraits<float>::highest()) + .reshape(pre_reduce_dims) + .reduce(reduction_dims, mean_with_nan) + .reshape(post_reduce_dims); +} + +} // end namespace Eigen + +#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_POOLING_H_ diff --git a/tensorflow/core/kernels/eigen_pooling_test.cc b/tensorflow/core/kernels/eigen_pooling_test.cc new file mode 100644 index 0000000000..cf6957571f --- /dev/null +++ b/tensorflow/core/kernels/eigen_pooling_test.cc @@ -0,0 +1,742 @@ +/* Copyright 2015 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/kernels/eigen_pooling.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/platform/test.h" + +namespace Eigen { + +namespace { +void EigenApprox(float a, float b) { + ASSERT_TRUE(std::abs(a - b) <= std::min(std::abs(a), std::abs(b)) * 1e-3); +} +} + +TEST(EigenPoolingTest, Simple) { + const int depth = 10; + const int input_rows = 5; + const int input_cols = 5; + const int num_batches = 13; + const int patch_rows = 4; + const int patch_cols = 4; + const int output_rows = 2; + const int output_cols = 2; + + Tensor<float, 4> input(depth, input_rows, input_cols, num_batches); + Tensor<float, 4> result(depth, output_rows, output_cols, num_batches); + input = input.constant(11.0f) + input.random(); + result.setRandom(); + result = result.constant(-1000.f); + + // Max pooling using a 4x4 window and a stride of 1. + const int stride = 1; + result = SpatialMaxPooling(input, patch_rows, patch_cols, stride, stride, + PADDING_VALID); + + EXPECT_EQ(result.dimension(0), depth); + EXPECT_EQ(result.dimension(1), output_rows); + EXPECT_EQ(result.dimension(2), output_cols); + EXPECT_EQ(result.dimension(3), num_batches); + + for (int b = 0; b < num_batches; ++b) { + for (int d = 0; d < depth; ++d) { + for (int i = 0; i < output_rows; ++i) { + for (int j = 0; j < output_cols; ++j) { + float expected = -10000.f; + for (int r = 0; r < patch_rows; ++r) { + for (int c = 0; c < patch_cols; ++c) { + expected = (std::max)(expected, input(d, r + i, c + j, b)); + } + } + if (result(d, i, j, b) != expected) { + std::cout << "at d=" << d << " b=" << b << " i=" << i << " j=" << j + << " " << result(d, i, j, b) << " vs " << expected + << std::endl; + } + EigenApprox(result(d, i, j, b), expected); + } + } + } + } +} + +TEST(EigenPoolingTest, SimpleRowMajor) { + const int depth = 10; + const int input_rows = 5; + const int input_cols = 5; + const int num_batches = 13; + const int patch_rows = 4; + const int patch_cols = 4; + const int output_rows = 2; + const int output_cols = 2; + + Tensor<float, 4, RowMajor> input(num_batches, input_cols, input_rows, depth); + Tensor<float, 4, RowMajor> result(num_batches, output_cols, output_rows, + depth); + input = input.constant(11.0f) + input.random(); + result.setRandom(); + result = result.constant(-1000.f); + + // Max pooling using a 4x4 window and a stride of 1. + const int stride = 1; + result = SpatialMaxPooling(input, patch_rows, patch_cols, stride, stride, + PADDING_VALID); + + EXPECT_EQ(result.dimension(3), depth); + EXPECT_EQ(result.dimension(2), output_rows); + EXPECT_EQ(result.dimension(1), output_cols); + EXPECT_EQ(result.dimension(0), num_batches); + + for (int b = 0; b < num_batches; ++b) { + for (int d = 0; d < depth; ++d) { + for (int i = 0; i < output_rows; ++i) { + for (int j = 0; j < output_cols; ++j) { + float expected = -10000.f; + for (int r = 0; r < patch_rows; ++r) { + for (int c = 0; c < patch_cols; ++c) { + expected = (std::max)(expected, input(b, c + j, r + i, d)); + } + } + if (result(b, j, i, d) != expected) { + std::cout << "at d=" << d << " b=" << b << " i=" << i << " j=" << j + << " " << result(b, j, i, d) << " vs " << expected + << std::endl; + } + EigenApprox(result(b, j, i, d), expected); + } + } + } + } +} + +TEST(EigenPoolingTest, Cuboid) { + const int channels = 10; + const int input_planes = 5; + const int input_rows = 5; + const int input_cols = 5; + const int num_batches = 13; + const int patch_rows = 4; + const int patch_cols = 3; + const int patch_planes = 2; + const int output_rows = 2; + const int output_cols = 3; + const int output_planes = 4; + + Tensor<float, 5> input(channels, input_planes, input_rows, input_cols, + num_batches); + Tensor<float, 5> result(channels, output_planes, output_rows, output_cols, + num_batches); + input = input.constant(11.0f) + input.random(); + result.setRandom(); + result = result.constant(-1000.0f); + + // Max pooling using a 4x3x2 window and a stride of 1. + const int stride = 1; + result = CuboidMaxPooling(input, patch_planes, patch_rows, patch_cols, stride, + stride, stride, PADDING_VALID); + + EXPECT_EQ(result.dimension(0), channels); + EXPECT_EQ(result.dimension(1), output_planes); + EXPECT_EQ(result.dimension(2), output_rows); + EXPECT_EQ(result.dimension(3), output_cols); + EXPECT_EQ(result.dimension(4), num_batches); + + for (int b = 0; b < num_batches; ++b) { + for (int d = 0; d < channels; ++d) { + for (int i = 0; i < output_planes; ++i) { + for (int j = 0; j < output_rows; ++j) { + for (int k = 0; k < output_cols; ++k) { + float expected = -10000.f; + for (int p = 0; p < patch_planes; ++p) { + for (int r = 0; r < patch_rows; ++r) { + for (int c = 0; c < patch_cols; ++c) { + expected = + (std::max)(expected, input(d, p + i, r + j, c + k, b)); + } + } + } + if (result(d, i, j, k, b) != expected) { + std::cout << "at d=" << d << " b=" << b << " i=" << i + << " j=" << j << " k=" << k << " " + << result(d, i, j, k, b) << " vs " << expected + << std::endl; + } + EigenApprox(result(d, i, j, k, b), expected); + } + } + } + } + } +} + +TEST(EigenPoolingTest, CuboidRowMajor) { + const int channels = 10; + const int input_planes = 5; + const int input_rows = 5; + const int input_cols = 5; + const int num_batches = 13; + const int patch_rows = 4; + const int patch_cols = 3; + const int patch_planes = 2; + const int output_rows = 2; + const int output_cols = 3; + const int output_planes = 4; + + Tensor<float, 5, RowMajor> input(num_batches, input_cols, input_rows, + input_planes, channels); + Tensor<float, 5, RowMajor> result(num_batches, output_cols, output_rows, + output_planes, channels); + input = input.constant(11.0f) + input.random(); + result.setRandom(); + result = result.constant(-1000.0f); + + // Max pooling using a 4x3x2 window and a stride of 1. + const int stride = 1; + result = CuboidMaxPooling(input, patch_planes, patch_rows, patch_cols, stride, + stride, stride, PADDING_VALID); + + EXPECT_EQ(result.dimension(4), channels); + EXPECT_EQ(result.dimension(3), output_planes); + EXPECT_EQ(result.dimension(2), output_rows); + EXPECT_EQ(result.dimension(1), output_cols); + EXPECT_EQ(result.dimension(0), num_batches); + + for (int b = 0; b < num_batches; ++b) { + for (int d = 0; d < channels; ++d) { + for (int i = 0; i < output_planes; ++i) { + for (int j = 0; j < output_rows; ++j) { + for (int k = 0; k < output_cols; ++k) { + float expected = -10000.f; + for (int p = 0; p < patch_planes; ++p) { + for (int r = 0; r < patch_rows; ++r) { + for (int c = 0; c < patch_cols; ++c) { + expected = + (std::max)(expected, input(b, c + k, r + j, p + i, d)); + } + } + } + if (result(b, k, j, i, d) != expected) { + std::cout << "at d=" << d << " b=" << b << " i=" << i + << " j=" << j << " k=" << k << " " + << result(b, k, j, i, d) << " vs " << expected + << std::endl; + } + EigenApprox(result(b, k, j, i, d), expected); + } + } + } + } + } +} + +TEST(EigenPoolingTest, ValidCuboid) { + const int channels = 10; + const int input_planes = 5; + const int input_rows = 5; + const int input_cols = 5; + const int num_batches = 13; + const int patch_rows = 4; + const int patch_cols = 3; + const int patch_planes = 2; + const int output_rows = 2; + const int output_cols = 3; + const int output_planes = 4; + + Tensor<float, 5> input(channels, input_planes, input_rows, input_cols, + num_batches); + Tensor<float, 5> result(channels, output_planes, output_rows, output_cols, + num_batches); + input = input.constant(11.0f) + input.random(); + result.setRandom(); + result = result.constant(-1000.0f); + + // Max pooling using a 4x3x2 window and a stride of 1. + const int stride = 1; + result = CuboidAvgPooling(input, patch_planes, patch_rows, patch_cols, stride, + stride, stride, PADDING_VALID); + + EXPECT_EQ(result.dimension(0), channels); + EXPECT_EQ(result.dimension(1), output_planes); + EXPECT_EQ(result.dimension(2), output_rows); + EXPECT_EQ(result.dimension(3), output_cols); + EXPECT_EQ(result.dimension(4), num_batches); + + for (int b = 0; b < num_batches; ++b) { + for (int d = 0; d < channels; ++d) { + for (int i = 0; i < output_planes; ++i) { + for (int j = 0; j < output_rows; ++j) { + for (int k = 0; k < output_cols; ++k) { + float expected_sum = 0.0f; + int expected_count = 0; + for (int p = 0; p < patch_planes; ++p) { + for (int r = 0; r < patch_rows; ++r) { + for (int c = 0; c < patch_cols; ++c) { + expected_sum += input(d, p + i, r + j, c + k, b); + expected_count++; + } + } + } + const float expected = expected_sum / expected_count; + if (result(d, i, j, k, b) != expected) { + std::cout << "at d=" << d << " b=" << b << " i=" << i + << " j=" << j << " k=" << k << " " + << result(d, i, j, k, b) << " vs " << expected + << std::endl; + } + EigenApprox(result(d, i, j, k, b), expected); + } + } + } + } + } +} + +TEST(EigenPoolingTest, ValidCuboidRowMajor) { + const int channels = 10; + const int input_planes = 5; + const int input_rows = 5; + const int input_cols = 5; + const int num_batches = 13; + const int patch_rows = 4; + const int patch_cols = 3; + const int patch_planes = 2; + const int output_rows = 2; + const int output_cols = 3; + const int output_planes = 4; + + Tensor<float, 5, RowMajor> input(num_batches, input_cols, input_rows, + input_planes, channels); + Tensor<float, 5, RowMajor> result(num_batches, output_cols, output_rows, + output_planes, channels); + input = input.constant(11.0f) + input.random(); + result.setRandom(); + result = result.constant(-1000.0f); + + // Max pooling using a 4x3x2 window and a stride of 1. + const int stride = 1; + result = CuboidAvgPooling(input, patch_planes, patch_rows, patch_cols, stride, + stride, stride, PADDING_VALID); + + EXPECT_EQ(result.dimension(4), channels); + EXPECT_EQ(result.dimension(3), output_planes); + EXPECT_EQ(result.dimension(2), output_rows); + EXPECT_EQ(result.dimension(1), output_cols); + EXPECT_EQ(result.dimension(0), num_batches); + + for (int b = 0; b < num_batches; ++b) { + for (int d = 0; d < channels; ++d) { + for (int i = 0; i < output_planes; ++i) { + for (int j = 0; j < output_rows; ++j) { + for (int k = 0; k < output_cols; ++k) { + float expected_sum = 0.0f; + int expected_count = 0; + for (int p = 0; p < patch_planes; ++p) { + for (int r = 0; r < patch_rows; ++r) { + for (int c = 0; c < patch_cols; ++c) { + expected_sum += input(b, c + k, r + j, p + i, d); + expected_count++; + } + } + } + const float expected = expected_sum / expected_count; + if (result(b, k, j, i, d) != expected) { + std::cout << "at d=" << d << " b=" << b << " i=" << i + << " j=" << j << " k=" << k << " " + << result(b, k, j, i, d) << " vs " << expected + << std::endl; + } + EigenApprox(result(b, k, j, i, d), expected); + } + } + } + } + } +} + +TEST(EigenPoolingTest, SameCuboid) { + const int channels = 10; + const int input_planes = 5; + const int input_rows = 5; + const int input_cols = 5; + const int num_batches = 13; + const int patch_rows = 4; + const int patch_cols = 3; + const int patch_planes = 2; + const int output_rows = input_rows; + const int output_cols = input_cols; + const int output_planes = input_planes; + + Tensor<float, 5> input(channels, input_planes, input_rows, input_cols, + num_batches); + Tensor<float, 5> result(channels, output_planes, output_rows, output_cols, + num_batches); + input = input.constant(11.0f) + input.random(); + result.setRandom(); + result = result.constant(-1000.0f); + + // Max pooling using a 4x3x2 window and a stride of 1. + const int stride = 1; + result = CuboidAvgPooling(input, patch_planes, patch_rows, patch_cols, stride, + stride, stride, PADDING_SAME); + + EXPECT_EQ(result.dimension(0), channels); + EXPECT_EQ(result.dimension(1), output_planes); + EXPECT_EQ(result.dimension(2), output_rows); + EXPECT_EQ(result.dimension(3), output_cols); + EXPECT_EQ(result.dimension(4), num_batches); + + const int pad_p = output_planes - input_planes + patch_planes - 1; + const int pad_r = output_rows - input_rows + patch_rows - 1; + const int pad_c = output_cols - input_cols + patch_cols - 1; + + // Number of pixels the input is extended with at the lower end in every + // dimension. + const int dp = pad_p - pad_p / 2; + const int dr = pad_r - pad_r / 2; + const int dc = pad_c - pad_c / 2; + + for (int b = 0; b < num_batches; ++b) { + for (int d = 0; d < channels; ++d) { + for (int i = 0; i < output_planes; ++i) { + for (int j = 0; j < output_rows; ++j) { + for (int k = 0; k < output_cols; ++k) { + float expected_sum = 0.0f; + int expected_count = 0; + for (int p = 0; p < patch_planes; ++p) { + for (int r = 0; r < patch_rows; ++r) { + for (int c = 0; c < patch_cols; ++c) { + const int in_p = p + i - dp; + const int in_r = r + j - dr; + const int in_c = c + k - dc; + if (in_p >= 0 && in_p < input_planes && in_r >= 0 && + in_r < input_rows && in_c >= 0 && in_c < input_cols) { + expected_sum += input(d, in_p, in_r, in_c, b); + expected_count++; + } + } + } + } + const float expected = expected_sum / expected_count; + if (result(d, i, j, k, b) != expected) { + std::cout << "at d=" << d << " b=" << b << " i=" << i + << " j=" << j << " k=" << k << " " + << result(d, i, j, k, b) << " vs " << expected + << std::endl; + } + EigenApprox(result(d, i, j, k, b), expected); + } + } + } + } + } +} + +TEST(EigenPoolingTest, SameCuboidRowMajor) { + const int channels = 10; + const int input_planes = 5; + const int input_rows = 5; + const int input_cols = 5; + const int num_batches = 13; + const int patch_rows = 4; + const int patch_cols = 3; + const int patch_planes = 2; + const int output_rows = input_rows; + const int output_cols = input_cols; + const int output_planes = input_planes; + + Tensor<float, 5, RowMajor> input(num_batches, input_cols, input_rows, + input_planes, channels); + Tensor<float, 5, RowMajor> result(num_batches, output_cols, output_rows, + output_planes, channels); + input = input.constant(11.0f) + input.random(); + result.setRandom(); + result = result.constant(-1000.0f); + + // Max pooling using a 4x3x2 window and a stride of 1. + const int stride = 1; + result = CuboidAvgPooling(input, patch_planes, patch_rows, patch_cols, stride, + stride, stride, PADDING_SAME); + + EXPECT_EQ(result.dimension(4), channels); + EXPECT_EQ(result.dimension(3), output_planes); + EXPECT_EQ(result.dimension(2), output_rows); + EXPECT_EQ(result.dimension(1), output_cols); + EXPECT_EQ(result.dimension(0), num_batches); + + const int pad_p = output_planes - input_planes + patch_planes - 1; + const int pad_r = output_rows - input_rows + patch_rows - 1; + const int pad_c = output_cols - input_cols + patch_cols - 1; + + // Number of pixels the input is extended with at the lower end in every + // dimension. + const int dp = pad_p - pad_p / 2; + const int dr = pad_r - pad_r / 2; + const int dc = pad_c - pad_c / 2; + + for (int b = 0; b < num_batches; ++b) { + for (int d = 0; d < channels; ++d) { + for (int i = 0; i < output_planes; ++i) { + for (int j = 0; j < output_rows; ++j) { + for (int k = 0; k < output_cols; ++k) { + float expected_sum = 0.0f; + int expected_count = 0; + for (int p = 0; p < patch_planes; ++p) { + for (int r = 0; r < patch_rows; ++r) { + for (int c = 0; c < patch_cols; ++c) { + const int in_p = p + i - dp; + const int in_r = r + j - dr; + const int in_c = c + k - dc; + if (in_p >= 0 && in_p < input_planes && in_r >= 0 && + in_r < input_rows && in_c >= 0 && in_c < input_cols) { + expected_sum += input(b, in_c, in_r, in_p, d); + expected_count++; + } + } + } + } + const float expected = expected_sum / expected_count; + if (result(b, k, j, i, d) != expected) { + std::cout << "at d=" << d << " b=" << b << " i=" << i + << " j=" << j << " k=" << k << " " + << result(b, k, j, i, d) << " vs " << expected + << std::endl; + } + EigenApprox(result(b, k, j, i, d), expected); + } + } + } + } + } +} + +static void test_strided_max_pooling_layer() { + const int depth = 10; + const int input_rows = 5; + const int input_cols = 5; + const int num_batches = 13; + const int patch_rows = 3; + const int patch_cols = 3; + const int output_rows = 2; + const int output_cols = 2; + + Tensor<float, 4> input(depth, input_rows, input_cols, num_batches); + Tensor<float, 4> result(depth, output_rows, output_cols, num_batches); + input = input.constant(11.0f) + input.random(); + result.setRandom(); + + // Max pooling using a 3x3 window and a stride of 2. + int stride = 2; + result = SpatialMaxPooling(input, patch_rows, patch_cols, stride, stride, + PADDING_VALID); + + EXPECT_EQ(result.dimension(0), depth); + EXPECT_EQ(result.dimension(1), output_rows); + EXPECT_EQ(result.dimension(2), output_cols); + EXPECT_EQ(result.dimension(3), num_batches); + + for (int b = 0; b < num_batches; ++b) { + for (int d = 0; d < depth; ++d) { + for (int i = 0; i < output_rows; ++i) { + for (int j = 0; j < output_cols; ++j) { + float expected = -10000.f; + for (int r = 0; r < patch_rows; ++r) { + for (int c = 0; c < patch_cols; ++c) { + expected = (std::max)( + expected, input(d, r + stride * i, c + stride * j, b)); + } + } + if (result(d, i, j, b) != expected) { + std::cout << "at d=" << d << " b=" << b << " i=" << i << " j=" << j + << " " << result(d, i, j, b) << " vs " << expected + << std::endl; + } + EigenApprox(result(d, i, j, b), expected); + } + } + } + } +} + +TEST(EigenPoolingTest, Strided) { + const int depth = 10; + const int input_rows = 5; + const int input_cols = 5; + const int num_batches = 13; + const int patch_rows = 3; + const int patch_cols = 3; + const int output_rows = 2; + const int output_cols = 2; + + Tensor<float, 4, RowMajor> input(num_batches, input_cols, input_rows, depth); + Tensor<float, 4, RowMajor> result(num_batches, output_cols, output_rows, + depth); + input = input.constant(11.0f) + input.random(); + result.setRandom(); + + // Max pooling using a 3x3 window and a stride of 2. + int stride = 2; + result = SpatialMaxPooling(input, patch_rows, patch_cols, stride, stride, + PADDING_VALID); + + EXPECT_EQ(result.dimension(3), depth); + EXPECT_EQ(result.dimension(2), output_rows); + EXPECT_EQ(result.dimension(1), output_cols); + EXPECT_EQ(result.dimension(0), num_batches); + + for (int b = 0; b < num_batches; ++b) { + for (int d = 0; d < depth; ++d) { + for (int i = 0; i < output_rows; ++i) { + for (int j = 0; j < output_cols; ++j) { + float expected = -10000.f; + for (int r = 0; r < patch_rows; ++r) { + for (int c = 0; c < patch_cols; ++c) { + expected = (std::max)( + expected, input(b, c + stride * j, r + stride * i, d)); + } + } + if (result(b, j, i, d) != expected) { + std::cout << "at d=" << d << " b=" << b << " i=" << i << " j=" << j + << " " << result(b, j, i, d) << " vs " << expected + << std::endl; + } + EigenApprox(result(b, j, i, d), expected); + } + } + } + } +} + +TEST(EigenPoolingTest, StridedCuboid) { + const int channels = 10; + const int input_planes = 5; + const int input_rows = 5; + const int input_cols = 5; + const int num_batches = 13; + const int patch_planes = 3; + const int patch_rows = 3; + const int patch_cols = 3; + const int output_planes = 2; + const int output_rows = 2; + const int output_cols = 2; + + Tensor<float, 5> input(channels, input_planes, input_rows, input_cols, + num_batches); + Tensor<float, 5> result(channels, output_planes, output_rows, output_cols, + num_batches); + input = input.constant(11.0f) + input.random(); + result.setRandom(); + + // Max pooling using a 3x3x3 window and a stride of 2. + int stride = 2; + result = CuboidMaxPooling(input, patch_planes, patch_rows, patch_cols, stride, + stride, stride, PADDING_VALID); + + EXPECT_EQ(result.dimension(0), channels); + EXPECT_EQ(result.dimension(1), output_planes); + EXPECT_EQ(result.dimension(2), output_rows); + EXPECT_EQ(result.dimension(3), output_cols); + EXPECT_EQ(result.dimension(4), num_batches); + + for (int b = 0; b < num_batches; ++b) { + for (int d = 0; d < channels; ++d) { + for (int i = 0; i < output_planes; ++i) { + for (int j = 0; j < output_rows; ++j) { + for (int k = 0; k < output_cols; ++k) { + float expected = -10000.f; + for (int p = 0; p < patch_planes; ++p) { + for (int r = 0; r < patch_rows; ++r) { + for (int c = 0; c < patch_cols; ++c) { + expected = (std::max)(expected, + input(d, p + stride * i, r + stride * j, + c + stride * k, b)); + } + } + } + if (result(d, i, j, k, b) != expected) { + std::cout << "at d=" << d << " b=" << b << " i=" << i + << " j=" << j << " " << k << " " + << result(d, i, j, k, b) << " vs " << expected + << std::endl; + } + EigenApprox(result(d, i, j, k, b), expected); + } + } + } + } + } +} + +TEST(EigenPoolingTest, StridedCuboidRowMajor) { + const int channels = 10; + const int input_planes = 5; + const int input_rows = 5; + const int input_cols = 5; + const int num_batches = 13; + const int patch_planes = 3; + const int patch_rows = 3; + const int patch_cols = 3; + const int output_planes = 2; + const int output_rows = 2; + const int output_cols = 2; + + Tensor<float, 5, RowMajor> input(num_batches, input_cols, input_rows, + input_planes, channels); + Tensor<float, 5, RowMajor> result(num_batches, output_cols, output_rows, + output_planes, channels); + input = input.constant(11.0f) + input.random(); + result.setRandom(); + + // Max pooling using a 3x3x3 window and a stride of 2. + int stride = 2; + result = CuboidMaxPooling(input, patch_planes, patch_rows, patch_cols, stride, + stride, stride, PADDING_VALID); + + EXPECT_EQ(result.dimension(4), channels); + EXPECT_EQ(result.dimension(3), output_planes); + EXPECT_EQ(result.dimension(2), output_rows); + EXPECT_EQ(result.dimension(1), output_cols); + EXPECT_EQ(result.dimension(0), num_batches); + + for (int b = 0; b < num_batches; ++b) { + for (int d = 0; d < channels; ++d) { + for (int i = 0; i < output_planes; ++i) { + for (int j = 0; j < output_rows; ++j) { + for (int k = 0; k < output_cols; ++k) { + float expected = -10000.f; + for (int p = 0; p < patch_planes; ++p) { + for (int r = 0; r < patch_rows; ++r) { + for (int c = 0; c < patch_cols; ++c) { + expected = (std::max)(expected, + input(b, c + stride * k, r + stride * j, + p + stride * i, d)); + } + } + } + if (result(b, k, j, i, d) != expected) { + std::cout << "at d=" << d << " b=" << b << " i=" << i + << " j=" << j << " " << k << " " + << result(b, k, j, i, d) << " vs " << expected + << std::endl; + } + EigenApprox(result(b, k, j, i, d), expected); + } + } + } + } + } +} + +} // namespace Eigen diff --git a/tensorflow/core/kernels/eigen_softmax.h b/tensorflow/core/kernels/eigen_softmax.h new file mode 100644 index 0000000000..49123e8062 --- /dev/null +++ b/tensorflow/core/kernels/eigen_softmax.h @@ -0,0 +1,90 @@ +/* Copyright 2015 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_SOFTMAX_H_ +#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_SOFTMAX_H_ + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +namespace Eigen { + +/** SoftMax + * \ingroup CXX11_NeuralNetworks_Module + * + * \brief Applies a softmax + * + * The input parameter is expected to be a col-major tensor with a rank of 2 (depth and other). + * + * The result can be assigned to a tensor of rank and dimensions equal to that of the input. The result will be laid out in col-major order. + * +*/ + +namespace { +struct SoftmaxOp { + SoftmaxOp(const float beta) : beta_(beta) { } + + template <typename Input> + typename Input::Dimensions dimensions(const Input& input) const { + return input.dimensions(); + } + + template <typename Input, typename Output, typename Device> + void eval(const Input& input, Output& output, const Device& device) const + { +#if !defined(EIGEN_HAS_INDEX_LIST) + // nvcc doesn't support cxx11 + Eigen::array<typename internal::traits<Input>::Index, 1> depth_dim; + depth_dim[0] = 0; + Eigen::array<typename internal::traits<Input>::Index, 2> bcast; + bcast[0] = dimensions(input)[0]; + bcast[1] = 1; + DSizes<typename internal::traits<Input>::Index, 2> dims2d; + dims2d[0] = 1; + dims2d[1] = dimensions(input)[1]; +#else + // Take advantage of cxx11 to give the compiler information it can use to + // optimize the code. + Eigen::IndexList<Eigen::type2index<0>> depth_dim; + Eigen::IndexList<int, Eigen::type2index<1>> bcast; + bcast.set(0, dimensions(input)[0]); + Eigen::IndexList<Eigen::type2index<1>, typename internal::traits<Input>::Index> dims2d; + dims2d.set(1, dimensions(input)[1]); +#endif + + output.device(device) = ((input - input.maximum(depth_dim).eval().reshape(dims2d).broadcast(bcast)) * beta_).exp(); + output.device(device) = output / (output.sum(depth_dim).eval().reshape(dims2d).broadcast(bcast)); + } + + private: + const float beta_; +}; +} + + +template <typename Input> +EIGEN_ALWAYS_INLINE +static const TensorCustomUnaryOp<const SoftmaxOp, const Input> +SoftMax(const Input& input, const float beta) +{ + EIGEN_STATIC_ASSERT(internal::traits<Input>::Layout == ColMajor, YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 2, YOU_MADE_A_PROGRAMMING_MISTAKE); + + const SoftmaxOp op(beta); + return input.customOp(op); +} + +} // end namespace Eigen + +#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_SOFTMAX_H_ diff --git a/tensorflow/core/kernels/eigen_softmax_test.cc b/tensorflow/core/kernels/eigen_softmax_test.cc new file mode 100644 index 0000000000..8623861518 --- /dev/null +++ b/tensorflow/core/kernels/eigen_softmax_test.cc @@ -0,0 +1,65 @@ +/* Copyright 2015 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/kernels/eigen_softmax.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/platform/test.h" + +namespace Eigen { + +namespace { +void EigenApprox(float a, float b) { + ASSERT_TRUE(std::abs(a - b) <= std::min(std::abs(a), std::abs(b)) * 1e-3); +} +} + +TEST(EigenSoftmaxTest, Simple) { + const int depth = 1024; + const int batch = 32; + const float beta = 1.2f; + + Tensor<float, 2> input(depth, batch); + input = input.constant(11.0f) + input.random(); + + Tensor<float, 2> reference(depth, batch); + reference.setRandom(); + + Eigen::array<int, 1> depth_dim; + depth_dim[0] = 0; + Eigen::array<int, 2> bcast; + bcast[0] = depth; + bcast[1] = 1; + Tensor<float, 2>::Dimensions dims2d; + dims2d[0] = 1; + dims2d[1] = batch; + reference = + ((input - + input.maximum(depth_dim).eval().reshape(dims2d).broadcast(bcast)) * + beta) + .exp(); + reference = + reference / + (reference.sum(depth_dim).eval().reshape(dims2d).broadcast(bcast)); + + Tensor<float, 2> result = SoftMax(input, beta); + + for (int i = 0; i < depth; ++i) { + for (int j = 0; j < batch; ++j) { + EigenApprox(result(i, j), reference(i, j)); + } + } +} + +} // namespace Eigen diff --git a/tensorflow/core/kernels/eigen_spatial_convolutions.h b/tensorflow/core/kernels/eigen_spatial_convolutions.h new file mode 100644 index 0000000000..53a3e99b19 --- /dev/null +++ b/tensorflow/core/kernels/eigen_spatial_convolutions.h @@ -0,0 +1,785 @@ +/* Copyright 2015 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_SPATIAL_CONVOLUTIONS_H_ +#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_SPATIAL_CONVOLUTIONS_H_ + +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +namespace Eigen { + +namespace internal { + +// These optimizations require vector instructions +#ifdef EIGEN_VECTORIZE + +// TODO: Consolidate this part of the code with the image patch extraction code +// since they are both very similar. +template <typename NewDimension, DenseIndex Rows, DenseIndex Cols, typename ArgType, typename Device, + typename Scalar_, typename Index, + typename nocontract_t, typename contract_t, + int Side, size_t packet_size, + bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment> +class TensorContractionInputMapper<Scalar_, Index, Side, TensorEvaluator<const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType> >, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> +{ + public: + typedef Scalar_ Scalar; + typedef TensorContractionInputMapper<Scalar, Index, Side, TensorEvaluator<const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType> >, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Self; + typedef TensorContractionSubMapper<Scalar, Index, Side, TensorEvaluator<const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType> >, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> SubMapper; + typedef SubMapper VectorMapper; + typedef SubMapper LinearMapper; + typedef typename packet_traits<Scalar>::type Packet; + + TensorContractionInputMapper(const TensorEvaluator<const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType> >, Device>& tensor, + const nocontract_t&, const nocontract_t&, + const contract_t&, const contract_t&) + : m_impl(tensor.impl().impl()) + { + Index patch_rows; + Index patch_depth; + if (internal::traits<ArgType>::Layout == ColMajor) { + patch_depth = tensor.impl().dimensions()[0]; + patch_rows = tensor.impl().dimensions()[1]; + m_patch_cols = tensor.impl().dimensions()[2]; + m_num_patches = tensor.impl().dimensions()[3]; + } else { + static const int NumDims = tensor.impl().dimensions().size(); + patch_depth = tensor.impl().dimensions()[NumDims - 1]; + patch_rows = tensor.impl().dimensions()[NumDims - 2]; + m_patch_cols = tensor.impl().dimensions()[NumDims - 3]; + m_num_patches = tensor.impl().dimensions()[NumDims - 4]; + } + m_patch_row_inflate_strides = tensor.impl().rowInflateStride(); + m_patch_col_inflate_strides = tensor.impl().colInflateStride(); + + m_colStride = patch_rows; + + m_outputRows = tensor.impl().outputRows(); + m_row_strides = tensor.impl().userRowStride(); + m_col_strides = tensor.impl().userColStride(); + + m_in_row_strides = tensor.impl().userInRowStride(); + m_in_col_strides = tensor.impl().userInColStride(); + + if (internal::traits<ArgType>::Layout == ColMajor) { + m_inputRows = tensor.impl().impl().dimensions()[1]; + m_inputCols = tensor.impl().impl().dimensions()[2]; + } else { + static const int NumDims = tensor.impl().impl().dimensions().size(); + m_inputRows = tensor.impl().impl().dimensions()[NumDims - 2]; + m_inputCols = tensor.impl().impl().dimensions()[NumDims - 3]; + } + + m_rowInputStride = patch_depth; + m_colInputStride = patch_depth * m_inputRows; + m_patchInputStride = patch_depth * m_inputRows * m_inputCols; + + m_rowPaddingTop = tensor.impl().rowPaddingTop(); + m_colPaddingLeft = tensor.impl().colPaddingLeft(); + + m_fastInputRowStride = internal::TensorIntDivisor<Index>(m_patch_row_inflate_strides); + m_fastInputColStride = internal::TensorIntDivisor<Index>(m_patch_col_inflate_strides); + m_fastNumPatches = internal::TensorIntDivisor<Index>(m_num_patches); + m_fastColStride = internal::TensorIntDivisor<Index>(m_colStride); + m_fastOutputRows = internal::TensorIntDivisor<Index>(m_outputRows); + m_fastDimZero = internal::TensorIntDivisor<Index>(patch_depth); + } + + TensorContractionInputMapper(const TensorContractionInputMapper& base_mapper) : + m_impl(base_mapper.m_impl) { + m_patch_cols = base_mapper.m_patch_cols; + m_num_patches = base_mapper.m_num_patches; + m_patch_row_inflate_strides = base_mapper.m_patch_row_inflate_strides; + m_patch_col_inflate_strides = base_mapper.m_patch_col_inflate_strides; + + m_colStride = base_mapper.m_colStride; + + m_rowInputStride = base_mapper.m_rowInputStride; + m_colInputStride = base_mapper.m_colInputStride; + m_patchInputStride = base_mapper.m_patchInputStride; + + m_inputRows = base_mapper.m_inputRows; + m_inputCols = base_mapper.m_inputCols; + + m_outputRows = base_mapper.m_outputRows; + m_row_strides = base_mapper.m_row_strides; + m_col_strides = base_mapper.m_col_strides; + + m_in_row_strides = base_mapper.m_in_row_strides; + m_in_col_strides = base_mapper.m_in_col_strides; + + m_rowPaddingTop = base_mapper.m_rowPaddingTop; + m_colPaddingLeft = base_mapper.m_colPaddingLeft; + + m_fastInputRowStride = base_mapper.m_fastInputRowStride; + m_fastInputColStride = base_mapper.m_fastInputColStride; + m_fastNumPatches = base_mapper.m_fastNumPatches; + m_fastColStride = base_mapper.m_fastColStride; + m_fastOutputRows = base_mapper.m_fastOutputRows; + m_fastDimZero = base_mapper.m_fastDimZero; + } + + // If true, turns off some optimizations for loading packets since the image + // patches are "non-standard" such as there are non-trivial strides or + // inflations in the input. + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE bool nonStandardPatches() const { + return m_in_row_strides != 1 || m_in_col_strides != 1 || m_patch_row_inflate_strides != 1 || m_patch_col_inflate_strides != 1; + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE SubMapper getSubMapper(Index i, Index j) const { + return SubMapper(*this, i, j); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE LinearMapper getLinearMapper(Index i, Index j) const { + return LinearMapper(*this, i, j); + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Scalar operator()(Index row) const { + Index rowIndex, colIndex, otherIndex; + computeBaseIndices(0, rowIndex, colIndex, otherIndex); + return loadCoeff(row, rowIndex, colIndex, otherIndex); + } + + // Load the coefficient at the patchIndex location instead of the usual m_rowIndex, + // m_colIndex, m_otherIndex. This is currently only used by the gpu code. EIGEN_DEVICE_FUNC + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar operator()(Index row, Index patchIndex) const { + Index rowIndex, colIndex, otherIndex; + computeBaseIndices(patchIndex, rowIndex, colIndex, otherIndex); + return loadCoeff(row, rowIndex, colIndex, otherIndex); + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Packet loadPacket(Index row) const { + Index rowIndex, colIndex, otherIndex; + computeBaseIndices(0, rowIndex, colIndex, otherIndex); + return loadPacket(row, rowIndex, colIndex, otherIndex); + } + + // Load the packet at the patchIndex location instead of the usual m_rowIndex, + // m_colIndex, m_otherIndex. This is currently only used by the gpu code. + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Packet loadPacket(Index row, Index patchIndex) const { + Index rowIndex, colIndex, otherIndex; + computeBaseIndices(patchIndex, rowIndex, colIndex, otherIndex); + return loadPacket(row, rowIndex, colIndex, otherIndex); + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index patchDepth() const { return m_rowInputStride; } + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index patchRows() const { return m_colStride; } + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index patchCols() const { return m_patch_cols; } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Packet packetNoPadding(const Index depth, const Index baseIndex) const { + const Index inputIndex = depth + baseIndex; + return m_impl.template packet<Unaligned>(inputIndex); + } + + private: + friend class TensorContractionSubMapper<Scalar, Index, Side, TensorEvaluator<const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType> >, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>; + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar loadCoeff(Index patchId, Index rowIndex, Index colIndex, Index otherIndex) const { + // Find the offset of the element wrt the location of the first element. + const Index patchOffset = patchId / m_fastDimZero; + + const Index colOffset = patchOffset / m_fastColStride; + const Index inputCol = colIndex + colOffset * m_in_col_strides; + const Index origInputCol = (m_patch_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0); + const Index rowOffset = patchOffset - colOffset * m_colStride; + const Index inputRow = rowIndex + rowOffset * m_in_row_strides; + const Index origInputRow = (m_patch_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0); + if (origInputCol < 0 || origInputRow < 0 || origInputCol >= m_inputCols || + origInputRow >= m_inputRows || + (inputCol != origInputCol * m_patch_col_inflate_strides) || + (inputRow != origInputRow * m_patch_row_inflate_strides)) { + return Scalar(0); + } + const Index depth = patchId - patchOffset * patchDepth(); + const Index inputIndex = depth + origInputRow * m_rowInputStride + origInputCol * m_colInputStride + otherIndex; + return m_impl.coeff(inputIndex); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE Scalar loadCoeffStandard(Index patchId, Index rowIndex, Index colIndex, Index otherIndex) const { + eigen_assert(!nonStandardPatches()); + + // Find the offset of the element wrt the location of the first element. + const Index patchOffset = patchId / m_fastDimZero; + + const Index colOffset = patchOffset / m_fastColStride; + const Index inputCol = colIndex + colOffset; + const Index rowOffset = patchOffset - colOffset * m_colStride; + const Index inputRow = rowIndex + rowOffset; + if (inputCol < 0 || inputCol >= m_inputCols || inputRow < 0 || inputRow >= m_inputRows) { + return Scalar(0); + } + const Index depth = patchId - patchOffset * patchDepth(); + const Index inputIndex = depth + inputRow * m_rowInputStride + inputCol * m_colInputStride + otherIndex; + return m_impl.coeff(inputIndex); + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Packet loadPacket(Index patchId, Index rowIndex, Index colIndex, Index otherIndex) const { + const Index packetSize = internal::unpacket_traits<Packet>::size; + EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(patchId < patchDepth()*patchRows()*m_patch_cols); + + if (nonStandardPatches()) { + return packetWithPossibleZero(patchId, rowIndex, colIndex, otherIndex); + } + return loadPacketStandard(patchId, rowIndex, colIndex, otherIndex); + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Packet loadPacketStandard(Index patchId, Index rowIndex, Index colIndex, Index otherIndex) const { + const Index packetSize = internal::unpacket_traits<Packet>::size; + EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(patchId < patchDepth()*patchRows()*m_patch_cols); + + eigen_assert(!nonStandardPatches()); + + if ((patchDepth() % packetSize) == 0) { + return loadPacketFast(patchId, rowIndex, colIndex, otherIndex); + } + else { + const Index patchOffsets[2] = {patchId / m_fastDimZero, (patchId + packetSize - 1) / m_fastDimZero}; + + const Index colOffsets[2] = {patchOffsets[0] / m_fastColStride, patchOffsets[1] / m_fastColStride}; + + const Index inputCols[2] = {colIndex + colOffsets[0], colIndex + colOffsets[1]}; + if (inputCols[0] >= m_inputCols || inputCols[1] < 0) { + // all zeros + return internal::pset1<Packet>(Scalar(0)); + } + + if (inputCols[0] == inputCols[1]) { + const Index rowOffsets[2] = {patchOffsets[0] - colOffsets[0]*m_colStride, patchOffsets[1] - colOffsets[1]*m_colStride}; + eigen_assert(rowOffsets[0] <= rowOffsets[1]); + const Index inputRows[2] = {rowIndex + rowOffsets[0], rowIndex + rowOffsets[1]}; + + if (inputRows[0] >= m_inputRows || inputRows[1] < 0) { + // all zeros + return internal::pset1<Packet>(Scalar(0)); + } + + if (inputRows[0] >= 0 && inputRows[1] < m_inputRows) { + // no padding + const Index depth = patchId - patchOffsets[0] * patchDepth(); + const Index inputIndex = depth + inputRows[0] * m_rowInputStride + inputCols[0] * m_colInputStride + otherIndex; + return m_impl.template packet<Unaligned>(inputIndex); + } + } + } + return packetWithPossibleZero(patchId, rowIndex, colIndex, otherIndex); + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Packet loadPacketFast(Index patchId, Index rowIndex, Index colIndex, Index otherIndex) const { + const Index packetSize = internal::unpacket_traits<Packet>::size; + EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) + eigen_assert(patchId < patchDepth()*patchRows()*m_patch_cols); + + eigen_assert(!nonStandardPatches()); + eigen_assert((patchDepth() % packetSize) == 0); + // Find the offset of the element wrt the location of the first element. + const Index patchOffset = patchId / m_fastDimZero; + eigen_assert((patchId + packetSize - 1) / m_fastDimZero == patchOffset); + + const Index colOffset = patchOffset / m_fastColStride; + const Index inputCol = colIndex + colOffset; + const Index rowOffset = patchOffset - colOffset*m_colStride; + const Index inputRow = rowIndex + rowOffset; + if (inputCol < 0 || inputRow < 0 || inputCol >= m_inputCols || + inputRow >= m_inputRows) { + // all zeros + return internal::pset1<Packet>(Scalar(0)); + } + // no padding + const Index depth = patchId - patchOffset * patchDepth(); + const Index inputIndex = depth + inputRow * m_rowInputStride + inputCol * m_colInputStride + otherIndex; + return m_impl.template packet<Unaligned>(inputIndex); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet packetWithPossibleZero(Index patchId, Index rowIndex, Index colIndex, Index otherIndex) const + { + const int packetSize = internal::unpacket_traits<Packet>::size; + EIGEN_ALIGN_MAX typename internal::remove_const<Scalar>::type values[packetSize]; + for (int i = 0; i < packetSize; ++i) { + values[i] = loadCoeff(patchId+i, rowIndex, colIndex, otherIndex); + } + Packet rslt = internal::pload<Packet>(values); + return rslt; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void computeBaseIndices(Index patchIndex, Index& rowIndex, Index& colIndex, Index& otherIndex) const { + const int NumInputDims = array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; + otherIndex = (NumInputDims == 3) ? 0 : patchIndex / m_fastNumPatches; + const Index patch2DIndex = (NumInputDims == 3) ? patchIndex : (patchIndex - otherIndex * m_num_patches); + otherIndex *= m_patchInputStride; + colIndex = patch2DIndex / m_fastOutputRows; + rowIndex = patch2DIndex - colIndex * m_outputRows; + colIndex = colIndex * m_col_strides - m_colPaddingLeft; + rowIndex = rowIndex * m_row_strides - m_rowPaddingTop; + } + + Index m_patch_cols; // number of colums in the patch + Index m_num_patches; // number of patches to extract. + Index m_patch_row_inflate_strides; // the strides for row inflation in the image patch + Index m_patch_col_inflate_strides; // the strides for col inflation in the image patch + // Fast representation of inflation strides. + internal::TensorIntDivisor<Index> m_fastInputRowStride; + internal::TensorIntDivisor<Index> m_fastInputColStride; + + Index m_otherStride; + Index m_colStride; + internal::TensorIntDivisor<Index> m_fastNumPatches; + internal::TensorIntDivisor<Index> m_fastColStride; + + Index m_rowInputStride; // row stride in the input tensor + Index m_colInputStride; // col stride in the input tensor + Index m_patchInputStride; // patch stride in the input tensor + + Index m_inputRows; // Number of rows in the input tensor + Index m_inputCols; // Number of cols in the input tensor + + Index m_outputRows; // Number of patch rows + + Index m_row_strides; // User specified row stride + Index m_col_strides; // User specified col stride + + Index m_in_row_strides; // User specified input row stride + Index m_in_col_strides; // User specified input col stride + + Index m_rowPaddingTop; // Row padding + Index m_colPaddingLeft; // Column padding + + internal::TensorIntDivisor<Index> m_fastOutputRows; + internal::TensorIntDivisor<Index> m_fastDimZero; + + const TensorEvaluator<ArgType, Device> m_impl; +}; + + +template <typename NewDimension, DenseIndex Rows, DenseIndex Cols, typename ArgType, typename Device, + typename Scalar, typename Index, + typename nocontract_t, typename contract_t, + int Side, size_t packet_size, + bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment> +class TensorContractionSubMapper<Scalar, Index, Side, TensorEvaluator<const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType> >, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> +{ + public: + typedef typename packet_traits<Scalar>::type Packet; + typedef typename packet_traits<Scalar>::half HalfPacket; + + typedef TensorContractionInputMapper<Scalar, Index, Side, TensorEvaluator<const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType> >, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> ParentMapper; + typedef TensorContractionSubMapper<Scalar, Index, Side, TensorEvaluator<const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType> >, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Self; + typedef Self LinearMapper; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionSubMapper(const ParentMapper& base_mapper, Index vert_offset, Index horiz_offset) + : m_base_mapper(base_mapper), m_depth_offset(vert_offset), m_col_offset(horiz_offset) { + m_base_mapper.computeBaseIndices(m_col_offset, m_rowIndex, m_colIndex, m_otherIndex); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionSubMapper(const Self& base_mapper, Index vert_offset, Index horiz_offset) + : m_base_mapper(base_mapper.m_base_mapper), m_depth_offset(vert_offset+base_mapper.m_depth_offset), m_col_offset(horiz_offset+base_mapper.m_col_offset) { + m_base_mapper.computeBaseIndices(m_col_offset, m_rowIndex, m_colIndex, m_otherIndex); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const { + return m_base_mapper.loadCoeff(i + m_depth_offset, m_rowIndex, m_colIndex, m_otherIndex); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i, Index j) const { + return m_base_mapper(i + m_depth_offset, j + m_col_offset); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const { + return m_base_mapper.loadPacket(i + m_depth_offset, m_rowIndex, m_colIndex, m_otherIndex); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const { + return m_base_mapper.template loadPacket(i + m_depth_offset, j + m_col_offset); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar loadCoeffStandard(Index i) const { + return m_base_mapper.loadCoeffStandard(i + m_depth_offset, m_rowIndex, m_colIndex, m_otherIndex); + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacketFast(Index i) const { + return m_base_mapper.loadPacketFast(i + m_depth_offset, m_rowIndex, m_colIndex, m_otherIndex); + } + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacketStandard(Index i) const { + return m_base_mapper.loadPacketStandard(i + m_depth_offset, m_rowIndex, m_colIndex, m_otherIndex); + } + template <typename Packet> + EIGEN_DEVICE_FUNC bool aligned(Index) const { + return false; + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE bool nonStandardPatches() const { + return m_base_mapper.nonStandardPatches(); + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index patchDepth() const { return m_base_mapper.m_rowInputStride; } + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index patchRows() const { return m_base_mapper.m_colStride; } + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index patchCols() const { return m_base_mapper.m_patch_cols; } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Packet packetNoPadding(const Index depth, const Index baseIndex) const { + const Index inputIndex = depth + baseIndex; + return m_base_mapper.m_impl.template packet<Unaligned>(inputIndex); + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE bool padRow(const Index row) const { + const Index r = m_rowIndex + row; + return r < 0 || r >= m_base_mapper.m_inputRows; + } + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE bool padCol(const Index col) const { + const Index c = m_colIndex + col; + return c < 0 || c >= m_base_mapper.m_inputCols; + } + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index baseIndex(const Index row, const Index col) const { + const Index r = m_rowIndex + row; + const Index c = m_colIndex + col; + return r * m_base_mapper.m_rowInputStride + c * m_base_mapper.m_colInputStride + m_otherIndex; + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index rowOffset() const { + const Index patchOffset = m_depth_offset / m_base_mapper.m_fastDimZero; + const Index colOffset = patchOffset / m_base_mapper.m_fastColStride; + return patchOffset-colOffset*m_base_mapper.m_colStride; + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index colOffset() const { + const Index patchOffset = m_depth_offset / m_base_mapper.m_fastDimZero; + const Index colOffset = patchOffset / m_base_mapper.m_fastColStride; + return colOffset; + } + + EIGEN_DEVICE_FUNC + EIGEN_ALWAYS_INLINE Index depthOffset() const { + const Index patchOffset = m_depth_offset % m_base_mapper.patchDepth(); + return patchOffset; + } + + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const { + return LinearMapper(m_base_mapper, i + m_depth_offset, j + m_col_offset); + } + + private: + const ParentMapper& m_base_mapper; // that was a reference before + Index m_depth_offset; // First row in the input matrix + Index m_col_offset; // First col in the input matrix + + Index m_rowIndex; // precomputed row index corresponding to the col offset + Index m_colIndex; // precomputed col index corresponding to the col offset + Index m_otherIndex; // precomputed other index corresponding to the col offset +}; + + +template <typename NewDimension, DenseIndex Rows, DenseIndex Cols, typename ArgType, typename Device, + typename Scalar, typename Index, + typename nocontract_t, typename contract_t, + int Side, size_t packet_size, + bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment, int nr> +struct gemm_pack_rhs<Scalar, Index, TensorContractionSubMapper<Scalar, Index, Side, TensorEvaluator<const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType> >, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>, nr, ColMajor, false, false> { + + typedef TensorContractionSubMapper<Scalar, Index, Side, TensorEvaluator<const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType> >, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> SubMapper; + typedef SubMapper DataMapper; + + static inline Index ceil_div(Index a, Index b) { + return (a + b - 1) / b; + } + + EIGEN_DONT_INLINE void operator()(Scalar* block, const DataMapper& rhs, Index depth, Index cols, Index stride=0, Index offset=0) const { + eigen_assert(stride == 0); + eigen_assert(offset == 0); + + EIGEN_STATIC_ASSERT((nr == 4), YOU_MADE_A_PROGRAMMING_MISTAKE); + typedef typename DataMapper::LinearMapper LinearMapper; + typedef typename packet_traits<Scalar>::type Packet; + + const Index packet_cols4 = (cols/4) * 4; + const Index peeled_k = (depth/packet_size) * packet_size; + const bool non_standard_patches = rhs.nonStandardPatches(); + + for(Index j2=0; j2<packet_cols4; j2+=4) + { + const SubMapper dm0 = rhs.getLinearMapper(0, j2 + 0); + const SubMapper dm1 = rhs.getLinearMapper(0, j2 + 1); + const SubMapper dm2 = rhs.getLinearMapper(0, j2 + 2); + const SubMapper dm3 = rhs.getLinearMapper(0, j2 + 3); + + Index k=0; + if((packet_size%4)==0 && !non_standard_patches) + { + const Index patch_depth = rhs.patchDepth(); + if ((patch_depth % packet_size) == 0) { + const Index patch_cols = rhs.patchCols(); + const Index patch_rows = rhs.patchRows(); + + const Index startCol = rhs.colOffset(); + const Index max_cols = std::min<Index>(ceil_div(peeled_k, patch_rows*patch_depth)+startCol, patch_cols); + + for (Index c = startCol; c < max_cols; ++c) { + eigen_assert(k < peeled_k); + const Index startRow = (c == startCol) ? rhs.rowOffset() : 0; + const Index max_rows = std::min<Index>(ceil_div(peeled_k-c*patch_rows*patch_depth, patch_depth)+startRow, patch_rows); + + const bool pad_col0 = dm0.padCol(c); + const bool pad_col1 = dm1.padCol(c); + const bool pad_col2 = dm2.padCol(c); + const bool pad_col3 = dm3.padCol(c); + for (Index r = startRow; r < max_rows; ++r) { + eigen_assert(k < peeled_k); + const bool pad0 = pad_col0 || dm0.padRow(r); + const bool pad1 = pad_col1 || dm1.padRow(r); + const bool pad2 = pad_col2 || dm2.padRow(r); + const bool pad3 = pad_col3 || dm3.padRow(r); + + const Index idx0 = dm0.baseIndex(r, c); + const Index idx1 = dm1.baseIndex(r, c); + const Index idx2 = dm2.baseIndex(r, c); + const Index idx3 = dm3.baseIndex(r, c); + + const Index startDepth = ((c == startCol) && (r == startRow)) ? rhs.depthOffset() : 0; + const Index max_depth = std::min<Index>(peeled_k-c*patch_rows*patch_depth-r*patch_depth+startDepth, patch_depth); + eigen_assert(max_depth % packet_size == 0); + for (Index d = startDepth; d < max_depth; d += packet_size) { + eigen_assert(k < peeled_k); + PacketBlock<Packet, 4> kernel; + kernel.packet[0] = pad0 ? pset1<Packet>(0) : rhs.packetNoPadding(d, idx0); + kernel.packet[1] = pad1 ? pset1<Packet>(0) : rhs.packetNoPadding(d, idx1); + kernel.packet[2] = pad2 ? pset1<Packet>(0) : rhs.packetNoPadding(d, idx2); + kernel.packet[3] = pad3 ? pset1<Packet>(0) : rhs.packetNoPadding(d, idx3); + ptranspose(kernel); + pstoreu(block+0*packet_size, kernel.packet[0]); + pstoreu(block+1*packet_size, kernel.packet[1]); + pstoreu(block+2*packet_size, kernel.packet[2]); + pstoreu(block+3*packet_size, kernel.packet[3]); + block+=4*packet_size; + k += packet_size; + } + } + } + + for(; k<peeled_k; k+=packet_size) { + PacketBlock<Packet, 4> kernel; + kernel.packet[0] = dm0.loadPacketFast(k); + kernel.packet[1] = dm1.loadPacketFast(k); + kernel.packet[2] = dm2.loadPacketFast(k); + kernel.packet[3] = dm3.loadPacketFast(k); + ptranspose(kernel); + pstoreu(block+0*packet_size, kernel.packet[0]); + pstoreu(block+1*packet_size, kernel.packet[1]); + pstoreu(block+2*packet_size, kernel.packet[2]); + pstoreu(block+3*packet_size, kernel.packet[3]); + block+=4*packet_size; + } + } + else { + for(; k<peeled_k; k+=packet_size) { + PacketBlock<Packet, 4> kernel; + kernel.packet[0] = dm0.loadPacketStandard(k); + kernel.packet[1] = dm1.loadPacketStandard(k); + kernel.packet[2] = dm2.loadPacketStandard(k); + kernel.packet[3] = dm3.loadPacketStandard(k); + ptranspose(kernel); + pstoreu(block+0*packet_size, kernel.packet[0]); + pstoreu(block+1*packet_size, kernel.packet[1]); + pstoreu(block+2*packet_size, kernel.packet[2]); + pstoreu(block+3*packet_size, kernel.packet[3]); + block+=4*packet_size; + } + } + } + if (!rhs.nonStandardPatches()) { + for(; k<depth; k++) + { + block[0] = dm0.loadCoeffStandard(k); + block[1] = dm1.loadCoeffStandard(k); + block[2] = dm2.loadCoeffStandard(k); + block[3] = dm3.loadCoeffStandard(k); + block += 4; + } + } + else { + for(; k<depth; k++) + { + block[0] = dm0(k); + block[1] = dm1(k); + block[2] = dm2(k); + block[3] = dm3(k); + block += 4; + } + } + } + + // copy the remaining columns one at a time (nr==1) + for(Index j2=packet_cols4; j2<cols; ++j2) + { + const SubMapper dm0 = rhs.getLinearMapper(0, j2); + for(Index k=0; k<depth; k++) + { + *block = dm0(k); + block += 1; + } + } + } +}; + +#endif // EIGEN_VECTORIZE +} // end namespace internal + + +/** SpatialConvolution + * \ingroup CXX11_NeuralNetworks_Module + * + * \brief Applies a 2D convolution over a multichannel input image. + * + * The input parameter is expected to be a tensor with a rank of 3 or more (channels, height, width, and optionally others) + * The kernel parameter is expected to be a 4D tensor (filters, channels, kernel_height, kernel_width) + * The input and the kernel must both be in col-major layout. The result will also be in col-major layout. + * + * If in_stride > 1, then applies convolution with holes (aka atrous convolution), sampling every in_stride input pixels. + * + * The result can be assigned to a tensor of rank equal to the rank of the input. The dimensions of the result will be filters, height, width (and others if applicable). + * + * It is possible to swap the order of the width and height dimensions provided that the same order is used in the input, the kernel, and the output. + * + */ +template <typename Input, typename Kernel> +EIGEN_ALWAYS_INLINE +static const typename internal::conditional< + internal::traits<Input>::Layout == ColMajor, + TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, internal::traits<Input>::NumDimensions>, const TensorContractionOp<const array<IndexPair<typename internal::traits<Input>::Index>, 1>, const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 2>, const Kernel>, const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 2>, const TensorImagePatchOp<Dynamic, Dynamic, const Input> > > >, + TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, internal::traits<Input>::NumDimensions>, const TensorContractionOp<const array<IndexPair<typename internal::traits<Input>::Index>, 1>, const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 2>, const TensorImagePatchOp<Dynamic, Dynamic, const Input> >, const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 2>, const Kernel> > > >::type +SpatialConvolution(const Input& input, const Kernel& kernel, const DenseIndex stride = 1, const PaddingType padding_type = PADDING_SAME, const DenseIndex in_stride = 1) { + + typedef typename internal::traits<Input>::Index TensorIndex; + TensorRef<Tensor<typename internal::traits<Input>::Scalar, internal::traits<Input>::NumDimensions, internal::traits<Input>::Layout, TensorIndex> > in(input); + TensorRef<Tensor<typename internal::traits<Kernel>::Scalar, internal::traits<Kernel>::NumDimensions, internal::traits<Kernel>::Layout, TensorIndex> > kern(kernel); + + EIGEN_STATIC_ASSERT(internal::traits<Input>::Layout == internal::traits<Kernel>::Layout, YOU_MADE_A_PROGRAMMING_MISTAKE); + static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor); + + static const int NumDims = internal::traits<Input>::NumDimensions; + + // Number of filters to apply. This is the same as the output depth of the result + const TensorIndex kernelFilters = isColMajor ? kern.dimensions()[0] : kern.dimensions()[3]; + // Number of channels. This is the same as the input depth. + const TensorIndex kernelChannels = isColMajor ? kern.dimensions()[1] : kern.dimensions()[2]; + const TensorIndex kernelRows = isColMajor ? kern.dimensions()[2] : kern.dimensions()[1]; + const TensorIndex kernelCols = isColMajor ? kern.dimensions()[3] : kern.dimensions()[0]; + + const DenseIndex kernelRowsEff = kernelRows + (kernelRows - 1) * (in_stride - 1); + const DenseIndex kernelColsEff = kernelCols + (kernelCols - 1) * (in_stride - 1); + + array<IndexPair<TensorIndex>, 1> contract_dims; + contract_dims[0] = IndexPair<TensorIndex>(1, 0); + + const TensorIndex InputRows = isColMajor ? in.dimension(1) : in.dimension(NumDims - 2); + const TensorIndex InputCols = isColMajor ? in.dimension(2) : in.dimension(NumDims - 3); + + TensorIndex out_height; + TensorIndex out_width; + switch (padding_type) { + case PADDING_VALID: + out_height = numext::ceil((InputRows - kernelRowsEff + 1.f) / static_cast<float>(stride)); + out_width = numext::ceil((InputCols - kernelColsEff + 1.f) / static_cast<float>(stride)); + break; + case PADDING_SAME: + out_height = numext::ceil(InputRows / static_cast<float>(stride)); + out_width = numext::ceil(InputCols / static_cast<float>(stride)); + break; + default: + eigen_assert(false && "unexpected padding"); + } + + // Molds the output of the patch extraction code into a 2d tensor: + // - the first dimension (dims[0]): the patch values to be multiplied with the kernels + // - the second dimension (dims[1]): everything else + DSizes<TensorIndex, 2> pre_contract_dims; + if (isColMajor) { + pre_contract_dims[0] = kernelChannels * kernelRows * kernelCols; + pre_contract_dims[1] = out_height * out_width; + for (int i = 3; i < NumDims; ++i) { + pre_contract_dims[1] *= in.dimension(i); + } + } else { + pre_contract_dims[1] = kernelChannels * kernelRows * kernelCols; + pre_contract_dims[0] = out_height * out_width; + for (int i = 0; i < NumDims - 3; ++i) { + pre_contract_dims[0] *= in.dimension(i); + } + } + + // Molds the output of the contraction into the shape expected by the used + // (assuming this is ColMajor): + // - 1st dim: kernel filters + // - 2nd dim: output height + // - 3rd dim: output width + // - 4th dim and beyond: everything else including batch size + DSizes<TensorIndex, NumDims> post_contract_dims; + if (isColMajor) { + post_contract_dims[0] = kernelFilters; + post_contract_dims[1] = out_height; + post_contract_dims[2] = out_width; + for (int i = 3; i < NumDims; ++i) { + post_contract_dims[i] = in.dimension(i); + } + } else { + post_contract_dims[NumDims - 1] = kernelFilters; + post_contract_dims[NumDims - 2] = out_height; + post_contract_dims[NumDims - 3] = out_width; + for (int i = 0; i < NumDims - 3; ++i) { + post_contract_dims[i] = in.dimension(i); + } + } + + DSizes<TensorIndex, 2> kernel_dims; + if (isColMajor) { + kernel_dims[0] = kernelFilters; + kernel_dims[1] = kernelChannels * kernelRows * kernelCols; + } else { + kernel_dims[0] = kernelChannels * kernelRows * kernelCols; + kernel_dims[1] = kernelFilters; + } + // TODO(yangke): choose() is defined in TensorContraction.h -- consider + // moving it to somewhere more "common". + return choose(Cond<internal::traits<Input>::Layout == ColMajor>(), + kernel.reshape(kernel_dims).contract(input.extract_image_patches(kernelRows, kernelCols, stride, stride, in_stride, in_stride, padding_type).reshape(pre_contract_dims), contract_dims).reshape(post_contract_dims), + input.extract_image_patches(kernelRows, kernelCols, stride, stride, in_stride, in_stride, padding_type).reshape(pre_contract_dims).contract(kernel.reshape(kernel_dims), contract_dims).reshape(post_contract_dims)); +} + +} // end namespace Eigen + +#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_EIGEN_SPATIAL_CONVOLUTIONS_H_ diff --git a/tensorflow/core/kernels/eigen_spatial_convolutions_test.cc b/tensorflow/core/kernels/eigen_spatial_convolutions_test.cc new file mode 100644 index 0000000000..f20287e73e --- /dev/null +++ b/tensorflow/core/kernels/eigen_spatial_convolutions_test.cc @@ -0,0 +1,1215 @@ +/* Copyright 2015 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/kernels/eigen_spatial_convolutions.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/eigen_cuboid_convolution.h" +#include "tensorflow/core/platform/test.h" + +namespace Eigen { + +namespace { +void EigenApprox(float a, float b) { + ASSERT_TRUE(std::abs(a - b) <= std::min(std::abs(a), std::abs(b)) * 1e-3); +} +static int ceil_div(int a, int b) { return (a + b - 1) / b; } +} + +TEST(EigenSpatialConvolutionsTest, Simple) { + const int input_depth = 7; + const int input_rows = 4; + const int input_cols = 5; + const int output_depth = 10; + const int patch_rows = 3; + const int patch_cols = 4; + const int output_rows = input_rows; + const int output_cols = input_cols; + + Tensor<float, 3> input(input_depth, input_rows, input_cols); + Tensor<float, 4> kernel(output_depth, input_depth, patch_rows, patch_cols); + Tensor<float, 3> result(output_depth, output_rows, output_cols); + + input = input.constant(11.0f) + input.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + result.setRandom(); + + result = SpatialConvolution(input, kernel); + + EXPECT_EQ(result.dimension(0), output_depth); + EXPECT_EQ(result.dimension(1), output_rows); + EXPECT_EQ(result.dimension(2), output_cols); + + for (int od = 0; od < output_depth; ++od) { + for (int i = 0; i < output_rows; ++i) { + for (int j = 0; j < output_cols; ++j) { + float expected = 0.0f; + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + for (int id = 0; id < input_depth; ++id) { + if (r - 1 + i >= 0 && c - 1 + j >= 0 && r - 1 + i < output_rows && + c - 1 + j < output_cols) { + expected += + input(id, r - 1 + i, c - 1 + j) * kernel(od, id, r, c); + } + } + } + } + EigenApprox(result(od, i, j), expected); + } + } + } +} + +TEST(EigenSpatialConvolutionsTest, SimpleRowMajor) { + const int input_depth = 7; + const int input_rows = 4; + const int input_cols = 5; + const int output_depth = 10; + const int patch_rows = 3; + const int patch_cols = 4; + const int output_rows = input_rows; + const int output_cols = input_cols; + + Tensor<float, 3, RowMajor> input(input_cols, input_rows, input_depth); + Tensor<float, 4, RowMajor> kernel(patch_cols, patch_rows, input_depth, + output_depth); + Tensor<float, 3, RowMajor> result(output_cols, output_rows, output_depth); + input = input.constant(11.0f) + input.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + result.setRandom(); + + result = SpatialConvolution(input, kernel); + + EXPECT_EQ(result.dimension(0), output_cols); + EXPECT_EQ(result.dimension(1), output_rows); + EXPECT_EQ(result.dimension(2), output_depth); + + for (int od = 0; od < output_depth; ++od) { + for (int i = 0; i < output_rows; ++i) { + for (int j = 0; j < output_cols; ++j) { + float expected = 0.0f; + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + for (int id = 0; id < input_depth; ++id) { + if (r - 1 + i >= 0 && c - 1 + j >= 0 && r - 1 + i < output_rows && + c - 1 + j < output_cols) { + expected += + input(c - 1 + j, r - 1 + i, id) * kernel(c, r, id, od); + } + } + } + } + EigenApprox(result(j, i, od), expected); + } + } + } +} + +TEST(EigenSpatialConvolutionsTest, BatchedSpatialConvolution) { + Tensor<float, 4> input(10, 5, 5, 13); + Tensor<float, 4> kernel(7, 10, 3, 3); + Tensor<float, 4> result(7, 5, 5, 13); + input = input.constant(11.0f) + input.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + result.setRandom(); + + result = SpatialConvolution(input, kernel); + + EXPECT_EQ(result.dimension(0), 7); + EXPECT_EQ(result.dimension(1), 5); + EXPECT_EQ(result.dimension(2), 5); + + for (int b = 0; b < 13; ++b) { + for (int od = 0; od < 7; ++od) { + for (int i = 0; i < 5; ++i) { + for (int j = 0; j < 5; ++j) { + float expected = 0.0f; + for (int c = 0; c < 3; ++c) { + for (int r = 0; r < 3; ++r) { + for (int id = 0; id < 10; ++id) { + if (r - 1 + i >= 0 && c - 1 + j >= 0 && r - 1 + i < 5 && + c - 1 + j < 5) { + expected += + input(id, r - 1 + i, c - 1 + j, b) * kernel(od, id, r, c); + } + } + } + } + EigenApprox(result(od, i, j, b), expected); + } + } + } + } +} + +TEST(EigenSpatialConvolutionsTest, BatchedSpatialConvolutionRowMajor) { + Tensor<float, 4, RowMajor> input(13, 5, 5, 10); + Tensor<float, 4, RowMajor> kernel(3, 3, 10, 7); + Tensor<float, 4, RowMajor> result(13, 5, 5, 7); + input = input.constant(11.0f) + input.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + result.setRandom(); + + result = SpatialConvolution(input, kernel); + + EXPECT_EQ(result.dimension(1), 5); + EXPECT_EQ(result.dimension(2), 5); + EXPECT_EQ(result.dimension(3), 7); + + for (int b = 0; b < 13; ++b) { + for (int od = 0; od < 7; ++od) { + for (int i = 0; i < 5; ++i) { + for (int j = 0; j < 5; ++j) { + float expected = 0.0f; + for (int c = 0; c < 3; ++c) { + for (int r = 0; r < 3; ++r) { + for (int id = 0; id < 10; ++id) { + if (r - 1 + i >= 0 && c - 1 + j >= 0 && r - 1 + i < 5 && + c - 1 + j < 5) { + expected += + input(b, c - 1 + j, r - 1 + i, id) * kernel(c, r, id, od); + } + } + } + } + EigenApprox(result(b, j, i, od), expected); + } + } + } + } +} + +TEST(EigenSpatialConvolutionsTest, ValidSpatialConvolution) { + const int input_depth = 10; + const int input_rows = 5; + const int input_cols = 5; + const int num_batches = 13; + const int output_depth = 7; + const int patch_rows = 4; + const int patch_cols = 4; + const int output_rows = input_rows - patch_rows + 1; + const int output_cols = input_cols - patch_cols + 1; + + Tensor<float, 4> input(input_depth, input_rows, input_cols, num_batches); + Tensor<float, 4> kernel(output_depth, input_depth, patch_rows, patch_cols); + Tensor<float, 4> result(output_depth, output_rows, output_cols, num_batches); + input = input.constant(11.0f) + input.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + result.setRandom(); + + // Apply a spatial convolution using a 4x4 kernel, valid padding, and a stride + // of 1. + const int stride = 1; + result = SpatialConvolution(input, kernel, stride, PADDING_VALID); + + EXPECT_EQ(result.dimension(0), output_depth); + EXPECT_EQ(result.dimension(1), output_rows); + EXPECT_EQ(result.dimension(2), output_cols); + EXPECT_EQ(result.dimension(3), num_batches); + + for (int b = 0; b < num_batches; ++b) { + for (int od = 0; od < output_depth; ++od) { + for (int i = 0; i < output_rows; ++i) { + for (int j = 0; j < output_cols; ++j) { + float expected = 0.0f; + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + for (int id = 0; id < input_depth; ++id) { + expected += input(id, r + i, c + j, b) * kernel(od, id, r, c); + } + } + } + if (result(od, i, j, b) != expected) { + std::cout << "at od=" << od << " b=" << b << " i=" << i + << " j=" << j << " " << result(od, i, j, b) << " vs " + << expected << std::endl; + } + EigenApprox(result(od, i, j, b), expected); + } + } + } + } +} + +TEST(EigenSpatialConvolutionsTest, ValidSpatialConvolutionRowMajor) { + const int input_depth = 10; + const int input_rows = 5; + const int input_cols = 5; + const int num_batches = 13; + const int output_depth = 7; + const int patch_rows = 4; + const int patch_cols = 4; + const int output_rows = input_rows - patch_rows + 1; + const int output_cols = input_cols - patch_cols + 1; + + Tensor<float, 4, RowMajor> input(num_batches, input_cols, input_rows, + input_depth); + Tensor<float, 4, RowMajor> kernel(patch_cols, patch_rows, input_depth, + output_depth); + Tensor<float, 4, RowMajor> result(num_batches, output_cols, output_rows, + output_depth); + + input = input.constant(11.0f) + input.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + result.setRandom(); + + // Apply a spatial convolution using a 4x4 kernel, valid padding, and a stride + // of 1. + const int stride = 1; + result = SpatialConvolution(input, kernel, stride, PADDING_VALID); + + EXPECT_EQ(result.dimension(0), num_batches); + EXPECT_EQ(result.dimension(1), output_cols); + EXPECT_EQ(result.dimension(2), output_rows); + EXPECT_EQ(result.dimension(3), output_depth); + + for (int b = 0; b < num_batches; ++b) { + for (int od = 0; od < output_depth; ++od) { + for (int i = 0; i < output_rows; ++i) { + for (int j = 0; j < output_cols; ++j) { + float expected = 0.0f; + for (int c = 0; c < patch_rows; ++c) { + for (int r = 0; r < patch_cols; ++r) { + for (int id = 0; id < input_depth; ++id) { + expected += input(b, c + j, r + i, id) * kernel(c, r, id, od); + } + } + } + if (result(b, j, i, od) != expected) { + std::cout << "at od=" << od << " b=" << b << " i=" << i + << " j=" << j << " " << result(b, j, i, od) << " vs " + << expected << std::endl; + } + EigenApprox(result(b, j, i, od), expected); + } + } + } + } +} + +TEST(EigenSpatialConvolutionsTest, StridedSpatialConvolution) { + const int input_depth = 10; + const int input_rows = 5; + const int input_cols = 5; + const int num_batches = 13; + const int output_depth = 7; + const int patch_rows = 3; + const int patch_cols = 3; + const int output_rows = 2; + const int output_cols = 2; + + Tensor<float, 4> input(input_depth, input_rows, input_cols, num_batches); + Tensor<float, 4> kernel(output_depth, input_depth, patch_rows, patch_cols); + Tensor<float, 4> result(output_depth, output_rows, output_cols, num_batches); + input = input.constant(11.0f) + input.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + result.setRandom(); + + // Apply a spatial convolution using a 3x3 kernel, valid padding, and a stride + // of 2. + int stride = 2; + result = SpatialConvolution(input, kernel, stride, PADDING_VALID); + + EXPECT_EQ(result.dimension(0), output_depth); + EXPECT_EQ(result.dimension(1), output_rows); + EXPECT_EQ(result.dimension(2), output_cols); + EXPECT_EQ(result.dimension(3), num_batches); + + for (int b = 0; b < num_batches; ++b) { + for (int od = 0; od < output_depth; ++od) { + for (int i = 0; i < output_rows; ++i) { + for (int j = 0; j < output_cols; ++j) { + float expected = 0.0f; + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + for (int id = 0; id < input_depth; ++id) { + expected += input(id, r + stride * i, c + stride * j, b) * + kernel(od, id, r, c); + } + } + } + EigenApprox(result(od, i, j, b), expected); + } + } + } + } +} + +TEST(EigenSpatialConvolutionsTest, StridedSpatialConvolutionRowMajor) { + const int input_depth = 10; + const int input_rows = 5; + const int input_cols = 5; + const int num_batches = 13; + const int output_depth = 7; + const int patch_rows = 3; + const int patch_cols = 3; + const int output_rows = 2; + const int output_cols = 2; + + Tensor<float, 4, RowMajor> input(num_batches, input_cols, input_rows, + input_depth); + Tensor<float, 4, RowMajor> kernel(patch_cols, patch_rows, input_depth, + output_depth); + Tensor<float, 4, RowMajor> result(num_batches, output_cols, output_rows, + output_depth); + input = input.constant(11.0f) + input.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + result.setRandom(); + + // Apply a spatial convolution using a 3x3 kernel, valid padding, and a stride + // of 2. + int stride = 2; + result = SpatialConvolution(input, kernel, stride, PADDING_VALID); + + EXPECT_EQ(result.dimension(0), num_batches); + EXPECT_EQ(result.dimension(1), output_cols); + EXPECT_EQ(result.dimension(2), output_rows); + EXPECT_EQ(result.dimension(3), output_depth); + + for (int b = 0; b < num_batches; ++b) { + for (int od = 0; od < output_depth; ++od) { + for (int i = 0; i < output_rows; ++i) { + for (int j = 0; j < output_cols; ++j) { + float expected = 0.0f; + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + for (int id = 0; id < input_depth; ++id) { + expected += input(b, c + stride * j, r + stride * i, id) * + kernel(c, r, id, od); + } + } + } + EigenApprox(result(b, j, i, od), expected); + } + } + } + } +} + +TEST(EigenSpatialConvolutionsTest, AtrousSpatial) { + const int input_depth = 10; + const int input_rows = 7; + const int input_cols = 7; + const int num_batches = 13; + const int output_depth = 7; + const int patch_rows = 3; + const int patch_cols = 3; + const int output_rows = 3; + const int output_cols = 3; + + Tensor<float, 4> input(input_depth, input_rows, input_cols, num_batches); + Tensor<float, 4> kernel(output_depth, input_depth, patch_rows, patch_cols); + Tensor<float, 4> result(output_depth, output_rows, output_cols, num_batches); + input = input.constant(11.0f) + input.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + result.setRandom(); + + // Apply a spatial convolution using a 3x3 kernel, valid padding + // output (standard) stride 1, and input (atrous) stride of 2. + int stride = 1; + int in_stride = 2; + result = SpatialConvolution(input, kernel, stride, PADDING_VALID, in_stride); + + EXPECT_EQ(result.dimension(0), output_depth); + EXPECT_EQ(result.dimension(1), output_rows); + EXPECT_EQ(result.dimension(2), output_cols); + EXPECT_EQ(result.dimension(3), num_batches); + + for (int b = 0; b < num_batches; ++b) { + for (int od = 0; od < output_depth; ++od) { + for (int i = 0; i < output_rows; ++i) { + for (int j = 0; j < output_cols; ++j) { + float expected = 0.0f; + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + for (int id = 0; id < input_depth; ++id) { + expected += input(id, in_stride * r + stride * i, + in_stride * c + stride * j, b) * + kernel(od, id, r, c); + } + } + } + EigenApprox(result(od, i, j, b), expected); + } + } + } + } +} + +TEST(EigenSpatialConvolutionsTest, AtrousSpatialRowMajor) { + const int input_depth = 10; + const int input_rows = 7; + const int input_cols = 7; + const int num_batches = 13; + const int output_depth = 7; + const int patch_rows = 3; + const int patch_cols = 3; + const int output_rows = 3; + const int output_cols = 3; + + Tensor<float, 4, RowMajor> input(num_batches, input_cols, input_rows, + input_depth); + Tensor<float, 4, RowMajor> kernel(patch_cols, patch_rows, input_depth, + output_depth); + Tensor<float, 4, RowMajor> result(num_batches, output_cols, output_rows, + output_depth); + input = input.constant(11.0f) + input.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + result.setRandom(); + + // Apply a spatial convolution using a 3x3 kernel, valid padding + // output (standard) stride 1, and input (atrous) stride of 2. + int stride = 1; + int in_stride = 2; + result = SpatialConvolution(input, kernel, stride, PADDING_VALID, in_stride); + + EXPECT_EQ(result.dimension(0), num_batches); + EXPECT_EQ(result.dimension(1), output_cols); + EXPECT_EQ(result.dimension(2), output_rows); + EXPECT_EQ(result.dimension(3), output_depth); + + for (int b = 0; b < num_batches; ++b) { + for (int od = 0; od < output_depth; ++od) { + for (int i = 0; i < output_rows; ++i) { + for (int j = 0; j < output_cols; ++j) { + float expected = 0.0f; + for (int c = 0; c < patch_cols; ++c) { + for (int r = 0; r < patch_rows; ++r) { + for (int id = 0; id < input_depth; ++id) { + expected += input(b, in_stride * c + stride * j, + in_stride * r + stride * i, id) * + kernel(c, r, id, od); + } + } + } + EigenApprox(result(b, j, i, od), expected); + } + } + } + } +} + +TEST(EigenSpatialConvolutionsTest, Cuboid) { + const int in_channels = 10; + const int in_depth = 5; + const int in_rows = 8; + const int in_cols = 7; + + const int kern_filters = 7; + const int kern_depth = 3; + const int kern_width = 4; + const int kern_height = 4; + + const int out_depth = in_depth; + const int out_height = in_rows; + const int out_width = in_cols; + + Tensor<float, 4> input(in_channels, in_depth, in_rows, in_cols); + Tensor<float, 5> kernel(kern_filters, in_channels, kern_depth, kern_height, + kern_width); + Tensor<float, 4> result(kern_filters, out_depth, out_height, out_width); + input = input.constant(11.0f) + input.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + result.setRandom(); + + result = CuboidConvolution(input, kernel); + + EXPECT_EQ(result.dimension(0), kern_filters); + EXPECT_EQ(result.dimension(1), out_depth); + EXPECT_EQ(result.dimension(2), out_height); + EXPECT_EQ(result.dimension(3), out_width); + + const int off_p = kern_depth / 2; + const int off_r = kern_height / 2; + const int off_c = kern_width / 2; + + for (int od = 0; od < kern_filters; ++od) { + for (int i = 0; i < out_depth; ++i) { + for (int j = 0; j < out_height; ++j) { + for (int k = 0; k < out_width; ++k) { + float expected = 0.0f; + for (int c = 0; c < kern_width; ++c) { + for (int r = 0; r < kern_height; ++r) { + for (int p = 0; p < kern_depth; ++p) { + for (int id = 0; id < in_channels; ++id) { + if (p - off_p + i >= 0 && r - off_r + j >= 0 && + c - off_c + k >= 0 && p - off_p + i < in_depth && + r - off_r + j < in_rows && c - off_c + k < in_cols) { + expected += + input(id, p - off_p + i, r - off_r + j, c - off_c + k) * + kernel(od, id, p, r, c); + } + } + } + } + } + EigenApprox(result(od, i, j, k), expected); + } + } + } + } +} + +TEST(EigenSpatialConvolutionsTest, CuboidRowMajor) { + const int in_channels = 10; + const int in_depth = 5; + const int in_rows = 8; + const int in_cols = 7; + + const int kern_filters = 7; + const int kern_depth = 3; + const int kern_width = 4; + const int kern_height = 4; + + const int out_depth = in_depth; + const int out_height = in_rows; + const int out_width = in_cols; + + Tensor<float, 4, RowMajor> input(in_cols, in_rows, in_depth, in_channels); + Tensor<float, 5, RowMajor> kernel(kern_width, kern_height, kern_depth, + in_channels, kern_filters); + Tensor<float, 4, RowMajor> result(out_width, out_height, out_depth, + kern_filters); + input = input.constant(11.0f) + input.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + result.setRandom(); + + result = CuboidConvolution(input, kernel); + + EXPECT_EQ(result.dimension(3), kern_filters); + EXPECT_EQ(result.dimension(2), out_depth); + EXPECT_EQ(result.dimension(1), out_height); + EXPECT_EQ(result.dimension(0), out_width); + + const int off_p = kern_depth / 2; + const int off_r = kern_height / 2; + const int off_c = kern_width / 2; + + for (int od = 0; od < kern_filters; ++od) { + for (int i = 0; i < out_depth; ++i) { + for (int j = 0; j < out_height; ++j) { + for (int k = 0; k < out_width; ++k) { + float expected = 0.0f; + for (int c = 0; c < kern_width; ++c) { + for (int r = 0; r < kern_height; ++r) { + for (int p = 0; p < kern_depth; ++p) { + for (int id = 0; id < in_channels; ++id) { + if (p - off_p + i >= 0 && r - off_r + j >= 0 && + c - off_c + k >= 0 && p - off_p + i < in_depth && + r - off_r + j < in_rows && c - off_c + k < in_cols) { + expected += + input(c - off_c + k, r - off_r + j, p - off_p + i, id) * + kernel(c, r, p, id, od); + } + } + } + } + } + EigenApprox(result(k, j, i, od), expected); + } + } + } + } +} + +TEST(EigenSpatialConvolutionsTest, ValidCuboid) { + const int in_channels = 10; + const int in_depth = 5; + const int in_rows = 5; + const int in_cols = 5; + + const int kern_filters = 7; + const int kern_depth = 3; + const int kern_width = 3; + const int kern_height = 3; + + const int out_depth = 3; + const int out_height = 3; + const int out_width = 3; + + Tensor<float, 4> input(in_channels, in_depth, in_rows, in_cols); + Tensor<float, 5> kernel(kern_filters, in_channels, kern_depth, kern_height, + kern_width); + Tensor<float, 4> result(kern_filters, out_depth, out_height, out_width); + input = input.constant(11.0f) + input.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + result.setRandom(); + + result = CuboidConvolution(input, kernel, 1, 1, 1, PADDING_VALID); + + EXPECT_EQ(result.dimension(0), kern_filters); + EXPECT_EQ(result.dimension(1), out_depth); + EXPECT_EQ(result.dimension(2), out_height); + EXPECT_EQ(result.dimension(3), out_width); + + for (int od = 0; od < kern_filters; ++od) { + for (int i = 0; i < out_depth; ++i) { + for (int j = 0; j < out_height; ++j) { + for (int k = 0; k < out_width; ++k) { + float expected = 0.0f; + for (int c = 0; c < kern_width; ++c) { + for (int r = 0; r < kern_height; ++r) { + for (int p = 0; p < kern_depth; ++p) { + for (int id = 0; id < in_channels; ++id) { + expected += + input(id, p + i, r + j, c + k) * kernel(od, id, p, r, c); + } + } + } + } + EigenApprox(result(od, i, j, k), expected); + } + } + } + } +} + +TEST(EigenSpatialConvolutionsTest, ValidCuboidRowMajor) { + const int in_channels = 10; + const int in_depth = 5; + const int in_rows = 5; + const int in_cols = 5; + + const int kern_filters = 7; + const int kern_depth = 3; + const int kern_width = 3; + const int kern_height = 3; + + const int out_depth = 3; + const int out_height = 3; + const int out_width = 3; + + Tensor<float, 4, RowMajor> input(in_cols, in_rows, in_depth, in_channels); + Tensor<float, 5, RowMajor> kernel(kern_width, kern_height, kern_depth, + in_channels, kern_filters); + Tensor<float, 4, RowMajor> result(out_width, out_height, out_depth, + kern_filters); + input = input.constant(11.0f) + input.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + result.setRandom(); + + result = CuboidConvolution(input, kernel, 1, 1, 1, PADDING_VALID); + + EXPECT_EQ(result.dimension(3), kern_filters); + EXPECT_EQ(result.dimension(2), out_depth); + EXPECT_EQ(result.dimension(1), out_height); + EXPECT_EQ(result.dimension(0), out_width); + + for (int od = 0; od < kern_filters; ++od) { + for (int i = 0; i < out_depth; ++i) { + for (int j = 0; j < out_height; ++j) { + for (int k = 0; k < out_width; ++k) { + float expected = 0.0f; + for (int c = 0; c < kern_width; ++c) { + for (int r = 0; r < kern_height; ++r) { + for (int p = 0; p < kern_depth; ++p) { + for (int id = 0; id < in_channels; ++id) { + expected += + input(c + k, r + j, p + i, id) * kernel(c, r, p, id, od); + } + } + } + } + EigenApprox(result(k, j, i, od), expected); + } + } + } + } +} + +TEST(EigenSpatialConvolutionsTest, BatchedCuboid) { + const int batches = 2; + const int in_channels = 10; + const int in_depth = 5; + const int in_rows = 8; + const int in_cols = 7; + + const int kern_filters = 7; + const int kern_depth = 3; + const int kern_width = 4; + const int kern_height = 4; + + const int out_depth = in_depth; + const int out_height = in_rows; + const int out_width = in_cols; + + Tensor<float, 5> input(in_channels, in_depth, in_rows, in_cols, batches); + Tensor<float, 5> kernel(kern_filters, in_channels, kern_depth, kern_height, + kern_width); + Tensor<float, 5> result(kern_filters, out_depth, out_height, out_width, + batches); + input = input.constant(11.0f) + input.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + result.setRandom(); + + result = CuboidConvolution(input, kernel); + + EXPECT_EQ(result.dimension(0), kern_filters); + EXPECT_EQ(result.dimension(1), out_depth); + EXPECT_EQ(result.dimension(2), out_height); + EXPECT_EQ(result.dimension(3), out_width); + EXPECT_EQ(result.dimension(4), batches); + + const int off_p = kern_depth / 2; + const int off_r = kern_height / 2; + const int off_c = kern_width / 2; + + for (int b = 0; b < batches; b++) { + for (int od = 0; od < kern_filters; ++od) { + for (int i = 0; i < out_depth; ++i) { + for (int j = 0; j < out_height; ++j) { + for (int k = 0; k < out_width; ++k) { + float expected = 0.0f; + for (int c = 0; c < kern_width; ++c) { + for (int r = 0; r < kern_height; ++r) { + for (int p = 0; p < kern_depth; ++p) { + for (int id = 0; id < in_channels; ++id) { + if (p - off_p + i >= 0 && r - off_r + j >= 0 && + c - off_c + k >= 0 && p - off_p + i < in_depth && + r - off_r + j < in_rows && c - off_c + k < in_cols) { + expected += input(id, p - off_p + i, r - off_r + j, + c - off_c + k, b) * + kernel(od, id, p, r, c); + } + } + } + } + } + EigenApprox(result(od, i, j, k, b), expected); + } + } + } + } + } +} + +TEST(EigenSpatialConvolutionsTest, BatchedCuboidRowMajor) { + const int batches = 2; + const int in_channels = 10; + const int in_depth = 5; + const int in_rows = 8; + const int in_cols = 7; + + const int kern_filters = 7; + const int kern_depth = 3; + const int kern_width = 4; + const int kern_height = 4; + + const int out_depth = in_depth; + const int out_height = in_rows; + const int out_width = in_cols; + + Tensor<float, 5, RowMajor> input(batches, in_cols, in_rows, in_depth, + in_channels); + Tensor<float, 5, RowMajor> kernel(kern_width, kern_height, kern_depth, + in_channels, kern_filters); + Tensor<float, 5, RowMajor> result(batches, out_width, out_height, out_depth, + kern_filters); + input = input.constant(11.0f) + input.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + result.setRandom(); + + result = CuboidConvolution(input, kernel); + + EXPECT_EQ(result.dimension(4), kern_filters); + EXPECT_EQ(result.dimension(3), out_depth); + EXPECT_EQ(result.dimension(2), out_height); + EXPECT_EQ(result.dimension(1), out_width); + EXPECT_EQ(result.dimension(0), batches); + + const int off_p = kern_depth / 2; + const int off_r = kern_height / 2; + const int off_c = kern_width / 2; + + for (int b = 0; b < batches; b++) { + for (int od = 0; od < kern_filters; ++od) { + for (int i = 0; i < out_depth; ++i) { + for (int j = 0; j < out_height; ++j) { + for (int k = 0; k < out_width; ++k) { + float expected = 0.0f; + for (int c = 0; c < kern_width; ++c) { + for (int r = 0; r < kern_height; ++r) { + for (int p = 0; p < kern_depth; ++p) { + for (int id = 0; id < in_channels; ++id) { + if (p - off_p + i >= 0 && r - off_r + j >= 0 && + c - off_c + k >= 0 && p - off_p + i < in_depth && + r - off_r + j < in_rows && c - off_c + k < in_cols) { + expected += input(b, c - off_c + k, r - off_r + j, + p - off_p + i, id) * + kernel(c, r, p, id, od); + } + } + } + } + } + EigenApprox(result(b, k, j, i, od), expected); + } + } + } + } + } +} + +TEST(EigenSpatialConvolutionsTest, StridedValidCuboid) { + const int in_channels = 10; + const int in_depth = 8; + const int in_rows = 7; + const int in_cols = 5; + + const int kern_filters = 7; + const int kern_depth = 3; + const int kern_width = 3; + const int kern_height = 3; + + const int out_depth = 3; + const int out_height = 3; + const int out_width = 2; + + Tensor<float, 4> input(in_channels, in_depth, in_rows, in_cols); + Tensor<float, 5> kernel(kern_filters, in_channels, kern_depth, kern_height, + kern_width); + Tensor<float, 4> result(kern_filters, out_depth, out_height, out_width); + input = input.constant(11.0f) + input.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + result.setRandom(); + + const int stride = 2; + result = + CuboidConvolution(input, kernel, stride, stride, stride, PADDING_VALID); + + EXPECT_EQ(result.dimension(0), kern_filters); + EXPECT_EQ(result.dimension(1), out_depth); + EXPECT_EQ(result.dimension(2), out_height); + EXPECT_EQ(result.dimension(3), out_width); + + for (int od = 0; od < kern_filters; ++od) { + for (int i = 0; i < out_depth; ++i) { + for (int j = 0; j < out_height; ++j) { + for (int k = 0; k < out_width; ++k) { + float expected = 0.0f; + for (int c = 0; c < kern_width; ++c) { + for (int r = 0; r < kern_height; ++r) { + for (int p = 0; p < kern_depth; ++p) { + for (int id = 0; id < in_channels; ++id) { + expected += input(id, p + stride * i, r + stride * j, + c + stride * k) * + kernel(od, id, p, r, c); + } + } + } + } + EigenApprox(result(od, i, j, k), expected); + } + } + } + } +} + +TEST(EigenSpatialConvolutionsTest, StridedValidCuboidRowMajor) { + const int in_channels = 10; + const int in_depth = 8; + const int in_rows = 7; + const int in_cols = 5; + + const int kern_filters = 7; + const int kern_depth = 3; + const int kern_width = 3; + const int kern_height = 3; + + const int out_depth = 3; + const int out_height = 3; + const int out_width = 2; + + Tensor<float, 4, RowMajor> input(in_cols, in_rows, in_depth, in_channels); + Tensor<float, 5, RowMajor> kernel(kern_width, kern_height, kern_depth, + in_channels, kern_filters); + Tensor<float, 4, RowMajor> result(out_width, out_height, out_depth, + kern_filters); + input = input.constant(11.0f) + input.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + result.setRandom(); + + const int stride = 2; + result = + CuboidConvolution(input, kernel, stride, stride, stride, PADDING_VALID); + + EXPECT_EQ(result.dimension(3), kern_filters); + EXPECT_EQ(result.dimension(2), out_depth); + EXPECT_EQ(result.dimension(1), out_height); + EXPECT_EQ(result.dimension(0), out_width); + + for (int od = 0; od < kern_filters; ++od) { + for (int i = 0; i < out_depth; ++i) { + for (int j = 0; j < out_height; ++j) { + for (int k = 0; k < out_width; ++k) { + float expected = 0.0f; + for (int c = 0; c < kern_width; ++c) { + for (int r = 0; r < kern_height; ++r) { + for (int p = 0; p < kern_depth; ++p) { + for (int id = 0; id < in_channels; ++id) { + expected += input(c + stride * k, r + stride * j, + p + stride * i, id) * + kernel(c, r, p, id, od); + } + } + } + } + EigenApprox(result(k, j, i, od), expected); + } + } + } + } +} + +TEST(EigenSpatialConvolutionsTest, StridedSameCuboid) { + const int in_channels = 10; + const int in_depth = 8; + const int in_rows = 7; + const int in_cols = 5; + + const int kern_filters = 7; + const int kern_depth = 3; + const int kern_width = 3; + const int kern_height = 3; + + const int stride = 2; + const int out_depth = ceil_div(in_depth, stride); + const int out_height = ceil_div(in_rows, stride); + const int out_width = ceil_div(in_cols, stride); + + Tensor<float, 4> input(in_channels, in_depth, in_rows, in_cols); + Tensor<float, 5> kernel(kern_filters, in_channels, kern_depth, kern_height, + kern_width); + Tensor<float, 4> result(kern_filters, out_depth, out_height, out_width); + input = input.constant(11.0f) + input.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + result.setRandom(); + + result = + CuboidConvolution(input, kernel, stride, stride, stride, PADDING_SAME); + + EXPECT_EQ(result.dimension(0), kern_filters); + EXPECT_EQ(result.dimension(1), out_depth); + EXPECT_EQ(result.dimension(2), out_height); + EXPECT_EQ(result.dimension(3), out_width); + + const int pad_p = out_depth * stride - in_depth + kern_depth - 1; + const int pad_r = out_height * stride - in_rows + kern_height - 1; + const int pad_c = out_width * stride - in_cols + kern_width - 1; + + // Number of pixels the input is extended with at the lower end in every + // dimension. + const int dp = pad_p - pad_p / 2; + const int dr = pad_r - pad_r / 2; + const int dc = pad_c - pad_c / 2; + + for (int od = 0; od < kern_filters; ++od) { + for (int i = 0; i < out_depth; ++i) { + for (int j = 0; j < out_height; ++j) { + for (int k = 0; k < out_width; ++k) { + float expected = 0.0f; + for (int c = 0; c < kern_width; ++c) { + for (int r = 0; r < kern_height; ++r) { + for (int p = 0; p < kern_depth; ++p) { + for (int id = 0; id < in_channels; ++id) { + const int in_p = p - dp + i * stride; + const int in_r = r - dr + j * stride; + const int in_c = c - dc + k * stride; + if (in_p >= 0 && in_r >= 0 && in_c >= 0 && in_p < in_depth && + in_r < in_rows && in_c < in_cols) { + expected += + input(id, in_p, in_r, in_c) * kernel(od, id, p, r, c); + } + } + } + } + } + EigenApprox(result(od, i, j, k), expected); + } + } + } + } +} + +TEST(EigenSpatialConvolutionsTest, StridedSameCuboidRowMajor) { + const int in_channels = 10; + const int in_depth = 8; + const int in_rows = 7; + const int in_cols = 5; + + const int kern_filters = 7; + const int kern_depth = 3; + const int kern_width = 3; + const int kern_height = 3; + + const int stride = 2; + const int out_depth = ceil_div(in_depth, stride); + const int out_height = ceil_div(in_rows, stride); + const int out_width = ceil_div(in_cols, stride); + + Tensor<float, 4, RowMajor> input(in_cols, in_rows, in_depth, in_channels); + Tensor<float, 5, RowMajor> kernel(kern_width, kern_height, kern_depth, + in_channels, kern_filters); + Tensor<float, 4, RowMajor> result(out_width, out_height, out_depth, + kern_filters); + input = input.constant(11.0f) + input.random(); + kernel = kernel.constant(2.0f) + kernel.random(); + result.setRandom(); + + result = + CuboidConvolution(input, kernel, stride, stride, stride, PADDING_SAME); + + EXPECT_EQ(result.dimension(3), kern_filters); + EXPECT_EQ(result.dimension(2), out_depth); + EXPECT_EQ(result.dimension(1), out_height); + EXPECT_EQ(result.dimension(0), out_width); + + const int pad_p = out_depth * stride - in_depth + kern_depth - 1; + const int pad_r = out_height * stride - in_rows + kern_height - 1; + const int pad_c = out_width * stride - in_cols + kern_width - 1; + + // Number of pixels the input is extended with at the lower end in every + // dimension. + const int dp = pad_p - pad_p / 2; + const int dr = pad_r - pad_r / 2; + const int dc = pad_c - pad_c / 2; + + for (int od = 0; od < kern_filters; ++od) { + for (int i = 0; i < out_depth; ++i) { + for (int j = 0; j < out_height; ++j) { + for (int k = 0; k < out_width; ++k) { + float expected = 0.0f; + for (int c = 0; c < kern_width; ++c) { + for (int r = 0; r < kern_height; ++r) { + for (int p = 0; p < kern_depth; ++p) { + for (int id = 0; id < in_channels; ++id) { + const int in_p = p - dp + i * stride; + const int in_r = r - dr + j * stride; + const int in_c = c - dc + k * stride; + if (in_p >= 0 && in_r >= 0 && in_c >= 0 && in_p < in_depth && + in_r < in_rows && in_c < in_cols) { + expected += + input(in_c, in_r, in_p, id) * kernel(c, r, p, id, od); + } + } + } + } + } + EigenApprox(result(k, j, i, od), expected); + } + } + } + } +} + +// A test case discovered when testing backward spatial convolution where the +// special tensor contraction mapper for spatial convolution contains a bug. +TEST(EigenSpatialConvolutionsTest, SpatialConvContractionMapper) { + // We have a 3x4 input image with 2x2 patch and stride of 2. + // The output has size 1x2. + typedef Tensor<float, 1>::DimensionPair DimPair; + Tensor<float, 4> out(1, 1, 2, 1); + Tensor<float, 4> kern(1, 1, 2, 2); + for (int i = 0; i < kern.size(); ++i) { + kern.coeffRef(i) = static_cast<float>(i) + 1; + } + for (int i = 0; i < out.size(); ++i) { + out.coeffRef(i) = static_cast<float>(i) + 1; + } + + DSizes<ptrdiff_t, 4> strides; + strides[0] = 1; + strides[1] = 2; + strides[2] = 2; + strides[3] = 1; + + array<std::pair<ptrdiff_t, ptrdiff_t>, 4> paddings; + paddings[0] = std::make_pair(0, 0); + paddings[1] = std::make_pair(1, 2); + paddings[2] = std::make_pair(1, 1); + paddings[3] = std::make_pair(0, 0); + + DSizes<ptrdiff_t, 3> out_dim; + out_dim[0] = 1; + out_dim[1] = 4; + out_dim[2] = 12; + + array<bool, 4> kernel_reverse; + kernel_reverse[0] = false; + kernel_reverse[1] = false; + kernel_reverse[2] = true; + kernel_reverse[3] = true; + + DSizes<ptrdiff_t, 3> k_dims; + k_dims[0] = 1; + k_dims[1] = 1; + k_dims[2] = 4; + + array<DimPair, 2> contract_dims; + contract_dims[0] = DimPair(0, 0); + contract_dims[1] = DimPair(2, 1); + + DSizes<ptrdiff_t, 4> in_dim; + in_dim[0] = 1; + in_dim[1] = 3; + in_dim[2] = 4; + in_dim[3] = 1; + + DSizes<ptrdiff_t, 2> in_dbg_dim; + in_dbg_dim[0] = 3; + in_dbg_dim[1] = 4; + + DSizes<ptrdiff_t, 2> out_dbg_dim; + out_dbg_dim[0] = 4; + out_dbg_dim[1] = 12; + + // This is the formula for computing the backward prop for input with a + // spatial convolution. + Tensor<float, 4> direct = + kern.reverse(kernel_reverse) + .reshape(k_dims) + .contract( + out.extract_image_patches(2, 2, 1, 1, 1, 1, 2, 2, 1, 2, 1, 1, 0) + .reshape(out_dim), + contract_dims) + .reshape(in_dim); + + Tensor<float, 4> indirect = + kern.reverse(kernel_reverse) + .reshape(k_dims) + .contract( + out.inflate(strides) + .pad(paddings) + .extract_image_patches(2, 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0) + .reshape(out_dim), + contract_dims) + .reshape(in_dim); + + eigen_assert(dimensions_match(direct.dimensions(), indirect.dimensions())); + for (size_t i = 0; i < direct.dimensions().TotalSize(); ++i) { + EigenApprox(direct.data()[i], indirect.data()[i]); + } + EigenApprox(1.0f, direct(0, 0, 0, 0)); + EigenApprox(3.0f, direct(0, 0, 1, 0)); + EigenApprox(2.0f, direct(0, 0, 2, 0)); + EigenApprox(6.0f, direct(0, 0, 3, 0)); + + EigenApprox(2.0f, direct(0, 1, 0, 0)); + EigenApprox(4.0f, direct(0, 1, 1, 0)); + EigenApprox(4.0f, direct(0, 1, 2, 0)); + EigenApprox(8.0f, direct(0, 1, 3, 0)); +} + +} // namespace Eigen diff --git a/tensorflow/core/kernels/maxpooling_op.cc b/tensorflow/core/kernels/maxpooling_op.cc index b6755c61a5..97cf15b5dd 100644 --- a/tensorflow/core/kernels/maxpooling_op.cc +++ b/tensorflow/core/kernels/maxpooling_op.cc @@ -20,7 +20,6 @@ limitations under the License. #include "tensorflow/core/kernels/maxpooling_op.h" #include <vector> -#include "third_party/eigen3/unsupported/Eigen/CXX11/NeuralNetworks" #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/framework/numeric_op.h" @@ -29,6 +28,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_slice.h" #include "tensorflow/core/kernels/conv_2d.h" +#include "tensorflow/core/kernels/eigen_pooling.h" #include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/kernels/pooling_ops_common.h" #include "tensorflow/core/lib/core/errors.h" diff --git a/tensorflow/core/kernels/maxpooling_op.h b/tensorflow/core/kernels/maxpooling_op.h index f94ed882b7..ec34337efd 100644 --- a/tensorflow/core/kernels/maxpooling_op.h +++ b/tensorflow/core/kernels/maxpooling_op.h @@ -17,8 +17,8 @@ limitations under the License. #define TENSORFLOW_KERNELS_MAXPOOLING_OP_H_ // Functor definition for MaxPoolingOp, must be compilable by nvcc. -#include "third_party/eigen3/unsupported/Eigen/CXX11/NeuralNetworks" #include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/kernels/eigen_pooling.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { diff --git a/tensorflow/core/kernels/maxpooling_op_gpu.h b/tensorflow/core/kernels/maxpooling_op_gpu.h index b46a339392..4d8d0e7fa7 100644 --- a/tensorflow/core/kernels/maxpooling_op_gpu.h +++ b/tensorflow/core/kernels/maxpooling_op_gpu.h @@ -22,7 +22,6 @@ limitations under the License. #define EIGEN_USE_GPU -#include "third_party/eigen3/unsupported/Eigen/CXX11/NeuralNetworks" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/platform/types.h" diff --git a/tensorflow/core/kernels/pooling_ops_common.h b/tensorflow/core/kernels/pooling_ops_common.h index f9f16d96d8..21396464fb 100644 --- a/tensorflow/core/kernels/pooling_ops_common.h +++ b/tensorflow/core/kernels/pooling_ops_common.h @@ -18,7 +18,6 @@ limitations under the License. #include <vector> -#include "third_party/eigen3/unsupported/Eigen/CXX11/NeuralNetworks" #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" diff --git a/tensorflow/core/kernels/pooling_ops_common_gpu.h b/tensorflow/core/kernels/pooling_ops_common_gpu.h index a1d4c4504d..0ef55a9677 100644 --- a/tensorflow/core/kernels/pooling_ops_common_gpu.h +++ b/tensorflow/core/kernels/pooling_ops_common_gpu.h @@ -21,7 +21,6 @@ limitations under the License. #define TENSORFLOW_CORE_KERNELS_POOLING_OPS_COMMON_GPU_H_ #include <vector> -#include "third_party/eigen3/unsupported/Eigen/CXX11/NeuralNetworks" #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #include "tensorflow/core/framework/numeric_op.h" #include "tensorflow/core/framework/op_kernel.h" diff --git a/tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.h b/tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.h index 65b4b331d9..056d5a7316 100644 --- a/tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.h +++ b/tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.h @@ -20,7 +20,6 @@ limitations under the License. #ifndef TENSORFLOW_CORE_KERNELS_RESIZE_NEAREST_NEIGHBOR_OP_GPU_H_ #define TENSORFLOW_CORE_KERNELS_RESIZE_NEAREST_NEIGHBOR_OP_GPU_H_ -#include "third_party/eigen3/unsupported/Eigen/CXX11/NeuralNetworks" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/platform/types.h" |