// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2015 Benoit Steiner // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_CXX11_NEURAL_NETWORKS_ATTENTION_H #define EIGEN_CXX11_NEURAL_NETWORKS_ATTENTION_H namespace Eigen { /** ExtractGlimpses * \ingroup CXX11_NeuralNetworks_Module * * \brief Extract glimpses from an input tensor. * * The input parameter is expected to be a col-major tensor with a rank of 4 (depth, x, y, and batch). * The width and height parameters specify the extension of the returned glimpses. * The offsets parameter specifies the x, y locations of the center of the glimpses relative to the center of the input image. The vector is expected to contain one IndexPair for each image in the batch dimension. * The normalized boolean indicates if incoming coordinates are normalized so that 0.0 and 1.0 correspond to the minimum and maximum of each height and width dimension. * The centered boolean indicates if incoming coordinates are centered relative to the image, in which case -1.0 and 1.0 correspond to minimum and maximum of each dimension while 0.0 corresponds to the center. * * The result can be assigned to a tensor of rank equal to that of the input. The result will be laid out in col-major order (depth, x, y, batch). * The dimensions of the result will be equal to the dimensions of the input except for width and height which will be equal to the requested glimpse size. */ namespace { template struct GlimpseExtractionOp { GlimpseExtractionOp(const Index width, const Index height, const std::vector >& offsets, const bool normalized, const bool centered, const bool uniform_noise) : width_(width), height_(height), offsets_(offsets), normalized_(normalized), centered_(centered), uniform_noise_(uniform_noise) { } template DSizes dimensions(const Input& input) const { typedef typename internal::traits::Index IndexType; typedef TensorRef::Scalar, 4, internal::traits::Layout, IndexType> > Ref; Ref in(input); DSizes dims = in.dimensions(); dims[0] = in.dimension(0); dims[1] = width_; dims[2] = height_; dims[3] = in.dimension(3); return dims; } template EIGEN_DEVICE_FUNC void eval(const Input& input, Output& output, const Device& device) const { typedef typename internal::traits::Index IndexType; typedef TensorRef::Scalar, 4, internal::traits::Layout, IndexType> > Ref; Ref in(input); const Index num_channels = in.dimension(0); const Index input_width = in.dimension(1); const Index input_height = in.dimension(2); const Index batch_size = in.dimension(3); eigen_assert(input_width > 0); eigen_assert(input_height > 0); for (Index i = 0; i < batch_size; ++i) { float x = offsets_[i].first, y = offsets_[i].second; // Un-normalize coordinates back to pixel space if normalized. if (normalized_) { x *= input_width; y *= input_height; } // Un-center if coordinates are centered on the image center. if (centered_) { x /= 2.0f; y /= 2.0f; x += input_width / 2.0f; y += input_height / 2.0f; } // Remove half of the glimpse window. x -= width_ / 2.0f; y -= height_ / 2.0f; const Index offset_x = (Index) x; const Index offset_y = (Index) y; Index glimpse_width = width_; Index glimpse_height = height_; bool partial_overlap = false; DSizes slice_offset(0, offset_x, offset_y); DSizes slice_extent(num_channels, width_, height_); DSizes base_offset(0, 0, 0); if (offset_x < 0) { slice_offset[1] = 0; glimpse_width = (std::max)(0, width_ + offset_x); slice_extent[1] = glimpse_width; base_offset[1] = width_ - glimpse_width; partial_overlap = true; } else if (offset_x + width_ >= input_width) { glimpse_width = (std::max)(0, input_width - offset_x); slice_extent[1] = glimpse_width; partial_overlap = true; } if (offset_y < 0) { slice_offset[2] = 0; glimpse_height = (std::max)(0, height_ + offset_y); slice_extent[2] = glimpse_height; base_offset[2] = height_ - glimpse_height; partial_overlap = true; } else if (offset_y + height_ >= input_height) { glimpse_height = (std::max)(0, input_height - offset_y); slice_extent[2] = glimpse_height; partial_overlap = true; } slice_extent[1] = std::min(input_width, slice_extent[1]); slice_extent[2] = std::min(input_height, slice_extent[2]); if (partial_overlap) { if (uniform_noise_) { // Initialize the glimpse with uniform noise. typedef typename internal::remove_const< typename internal::traits::Scalar>::type Scalar; TensorFixedSize > mini; mini.device(device) = input.template chip<3>(i).minimum(); TensorFixedSize > range; range.device(device) = (input.template chip<3>(i).maximum() - mini).template cast(); DSizes glimpse_size(num_channels, width_, height_); TensorMap > tmp(NULL, glimpse_size); output.template chip<3>(i).device(device) = mini.reshape(Sizes<1,1,1>()).broadcast(glimpse_size) + (tmp.random() * range.reshape(Sizes<1,1,1>()).broadcast(glimpse_size)).template cast(); } else { // Initialize the glimpse with white noise: compute the mean and sigma // of each channel, and use them to shape the gaussian. DSizes glimpse_size(width_, height_); DSizes input_size(input_width, input_height); typedef typename internal::remove_const< typename internal::traits::Scalar>::type Scalar; for (int j = 0; j < num_channels; ++j) { TensorFixedSize > mean; mean.device(device) = input.template chip<3>(i).template chip<0>(j).template cast().mean(); TensorFixedSize > sigma; sigma.device(device) = (input.template chip<3>(i).template chip<0>(j).template cast() - mean.reshape(Sizes<1,1>()).broadcast(input_size)).square().mean().sqrt(); TensorFixedSize > mini; mini.device(device) = input.template chip<3>(i).template chip<0>(j).minimum(); TensorFixedSize > maxi; maxi.device(device) = input.template chip<3>(i).template chip<0>(j).maximum(); TensorMap > tmp(NULL, glimpse_size); output.template chip<3>(i).template chip<0>(j).device(device) = (mean.reshape(Sizes<1,1>()).broadcast(glimpse_size) + (tmp.random(internal::NormalRandomGenerator()) * sigma.reshape(Sizes<1,1>()).broadcast(glimpse_size)).template cast()).cwiseMin(maxi.reshape(Sizes<1,1>()).broadcast(glimpse_size)).cwiseMax(mini.reshape(Sizes<1,1>()).broadcast(glimpse_size)); } } // Copy the part of the glimpse that cover the input image if any. if (glimpse_width == 0 || glimpse_height == 0) { continue; } output.template chip<3>(i).slice(base_offset, slice_extent).device(device) = input.template chip<3>(i).slice(slice_offset, slice_extent); } else { output.template chip<3>(i).device(device) = input.template chip<3>(i).slice(slice_offset, slice_extent); } } } private: const Index width_; const Index height_; const std::vector > offsets_; const bool normalized_; const bool centered_; const bool uniform_noise_; }; } template EIGEN_ALWAYS_INLINE static const TensorCustomUnaryOp::Index>, const Input> ExtractGlimpses(const Input& input, const typename internal::traits::Index width, const typename internal::traits::Index height, const std::vector >& offsets, const bool normalized = true, const bool centered = true, const bool uniform_noise = true) { EIGEN_STATIC_ASSERT(internal::traits::Layout == ColMajor, YOU_MADE_A_PROGRAMMING_MISTAKE); EIGEN_STATIC_ASSERT(internal::traits::NumDimensions == 4, YOU_MADE_A_PROGRAMMING_MISTAKE); typedef typename internal::traits::Index Index; const GlimpseExtractionOp op(width, height, offsets, normalized, centered, uniform_noise); return input.customOp(op); } } // end namespace Eigen #endif // EIGEN_CXX11_NEURAL_NETWORKS_ATTENTION_H