From 9a67bbb8830ed95c339289480cc2074fe28baf4e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 25 Jul 2018 11:51:16 -0700 Subject: New container for remote builds with all the newest pip packages defined in tensorflow/tools/ci_build/install/install_pip_packages.sh PiperOrigin-RevId: 206030031 --- third_party/toolchains/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'third_party') diff --git a/third_party/toolchains/BUILD b/third_party/toolchains/BUILD index fc3183a754..ec1006fe23 100644 --- a/third_party/toolchains/BUILD +++ b/third_party/toolchains/BUILD @@ -17,6 +17,6 @@ platform( remote_execution_properties = """ properties: { name: "container-image" - value:"docker://gcr.io/asci-toolchain/nosla-ubuntu16_04-tf@sha256:800a7b68cabef15419695c188ed33ed70adf678c2371b97b236f3ae26c38274d" + value:"docker://gcr.io/asci-toolchain/nosla-ubuntu16_04-tf@sha256:495a025ed5e273cfa5d53357ef93ac20500c008994e0be106c509f51555fb93c" }""", ) -- cgit v1.2.3 From 63563579653c1f0829d460eef5f05963111e08f0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 26 Jul 2018 13:16:04 -0700 Subject: Delete unused code in CXX11/src/NeuralNetworks PiperOrigin-RevId: 206209252 --- .../eigen3/unsupported/Eigen/CXX11/NeuralNetworks | 35 - .../Eigen/CXX11/src/NeuralNetworks/Activations.h | 116 --- .../Eigen/CXX11/src/NeuralNetworks/Attention.h | 209 ------ .../NeuralNetworks/BackwardCuboidConvolutions.h | 523 -------------- .../NeuralNetworks/BackwardSpatialConvolutions.h | 351 ---------- .../CXX11/src/NeuralNetworks/CuboidConvolution.h | 179 ----- .../Eigen/CXX11/src/NeuralNetworks/Patch3d.h | 240 ------- .../Eigen/CXX11/src/NeuralNetworks/Pooling.h | 433 ------------ .../Eigen/CXX11/src/NeuralNetworks/SoftMax.h | 83 --- .../CXX11/src/NeuralNetworks/SpatialConvolutions.h | 775 --------------------- .../src/NeuralNetworks/TensorConvolutionByFFT.h | 289 -------- 11 files changed, 3233 deletions(-) delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/NeuralNetworks delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Activations.h delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Attention.h delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/BackwardCuboidConvolutions.h delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/BackwardSpatialConvolutions.h delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/CuboidConvolution.h delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Patch3d.h delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Pooling.h delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/SoftMax.h delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/SpatialConvolutions.h delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/TensorConvolutionByFFT.h (limited to 'third_party') diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/NeuralNetworks b/third_party/eigen3/unsupported/Eigen/CXX11/NeuralNetworks deleted file mode 100644 index 7741b68d8a..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/NeuralNetworks +++ /dev/null @@ -1,35 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_NEURAL_NETWORKS_MODULE -#define EIGEN_CXX11_NEURAL_NETWORKS_MODULE - -#include "unsupported/Eigen/CXX11/Tensor" - -/** \defgroup CXX11_NeuralNetworks_Module Neural Networks Module - * - * This module provides an efficient implementation of the common primitives - * used by neural networks. - * The primitives are built on top of the tensor library. - * - * \code - * #include - * \endcode - */ - -#include "unsupported/Eigen/CXX11/src/NeuralNetworks/Activations.h" -#include "unsupported/Eigen/CXX11/src/NeuralNetworks/Attention.h" -#include "unsupported/Eigen/CXX11/src/NeuralNetworks/Pooling.h" -#include "unsupported/Eigen/CXX11/src/NeuralNetworks/SoftMax.h" -#include "unsupported/Eigen/CXX11/src/NeuralNetworks/BackwardCuboidConvolutions.h" -#include "unsupported/Eigen/CXX11/src/NeuralNetworks/CuboidConvolution.h" -#include "unsupported/Eigen/CXX11/src/NeuralNetworks/BackwardSpatialConvolutions.h" -#include "unsupported/Eigen/CXX11/src/NeuralNetworks/SpatialConvolutions.h" - -#endif // EIGEN_CXX11_NEURAL_NETWORKS_MODULE diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Activations.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Activations.h deleted file mode 100644 index cbcce9e282..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Activations.h +++ /dev/null @@ -1,116 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Benoit Steiner -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -#ifndef EIGEN_CXX11_NEURAL_NETWORKS_ACTIVATIONS_H -#define EIGEN_CXX11_NEURAL_NETWORKS_ACTIVATIONS_H - -namespace Eigen { - -/** scalar_sigmoid_fast_derivative_op - * \ingroup CXX11_NeuralNetworks_Module - * \brief Template functor to compute the fast derivative of a sigmoid - * - * Input should be the backpropagated gradient. - * - * \sa class CwiseUnaryOp, Cwise::sigmoid_fast_derivative() - */ -template -struct scalar_sigmoid_fast_derivative_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_sigmoid_fast_derivative_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& y) const { - const T one = T(1); - return (one - y) * y; - } - - template - inline Packet packetOp(const Packet& y) const { - const Packet one = internal::pset1(1); - return internal::pmul(internal::psub(one, y), y); - } -}; - -namespace internal { -template -struct functor_traits > { - enum { - Cost = NumTraits::AddCost * 2 + NumTraits::MulCost, - PacketAccess = packet_traits::HasAdd && packet_traits::HasMul && - packet_traits::HasNegate - }; -}; -} // namespace internal - -/** scalar_tanh_fast_derivative_op - * \ingroup CXX11_NeuralNetworks_Module - * \brief Template functor to compute the fast derivative of a tanh - * - * Input should be the backpropagated gradient. - * - * \sa class CwiseUnaryOp, Cwise::tanh_fast_derivative() - */ -template -struct scalar_tanh_fast_derivative_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_fast_derivative_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& y) const { - const T one = T(1); - return one - (y * y); - } - - template - inline Packet packetOp(const Packet& y) const { - const Packet one = internal::pset1(1); - return internal::psub(one, internal::pmul(y, y)); - } -}; - -namespace internal { -template -struct functor_traits > { - enum { - Cost = NumTraits::AddCost * 2 + NumTraits::MulCost * 1, - PacketAccess = packet_traits::HasAdd && packet_traits::HasMul && - packet_traits::HasNegate - }; -}; -} // namespace internal - -/** - * \ingroup CXX11_NeuralNetworks_Module - * \brief Template functor to clip the magnitude of the first scalar. - * - * \sa class CwiseBinaryOp, MatrixBase::Clip - */ -template -struct scalar_clip_op { - EIGEN_EMPTY_STRUCT_CTOR(scalar_clip_op) - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar - operator()(const Scalar& a, const Scalar& b) const { - return numext::mini(numext::maxi(a, -b), b); - } - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet - packetOp(const Packet& a, const Packet& b) const { - return internal::pmin(internal::pmax(a, internal::pnegate(b)), b); - } -}; - -namespace internal { -template -struct functor_traits > { - enum { - Cost = NumTraits::AddCost * 3, - PacketAccess = packet_traits::HasMax && - packet_traits::HasMin && - packet_traits::HasNegate - }; -}; -} // namespace internal - -} // end namespace Eigen - -#endif // EIGEN_CXX11_NEURAL_NETWORKS_ACTIVATIONS_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Attention.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Attention.h deleted file mode 100644 index d4bc7a3515..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Attention.h +++ /dev/null @@ -1,209 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Benoit Steiner -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -#ifndef EIGEN_CXX11_NEURAL_NETWORKS_ATTENTION_H -#define EIGEN_CXX11_NEURAL_NETWORKS_ATTENTION_H - -namespace Eigen { - -/** ExtractGlimpses - * \ingroup CXX11_NeuralNetworks_Module - * - * \brief Extract glimpses from an input tensor. - * - * The input parameter is expected to be a col-major tensor with a rank of 4 (depth, x, y, and batch). - * The width and height parameters specify the extension of the returned glimpses. - * The offsets parameter specifies the x, y locations of the center of the glimpses relative to the center of the input image. The vector is expected to contain one IndexPair for each image in the batch dimension. - * The normalized boolean indicates if incoming coordinates are normalized so that 0.0 and 1.0 correspond to the minimum and maximum of each height and width dimension. - * The centered boolean indicates if incoming coordinates are centered relative to the image, in which case -1.0 and 1.0 correspond to minimum and maximum of each dimension while 0.0 corresponds to the center. - * - * The result can be assigned to a tensor of rank equal to that of the input. The result will be laid out in col-major order (depth, x, y, batch). - * The dimensions of the result will be equal to the dimensions of the input except for width and height which will be equal to the requested glimpse size. - */ -namespace { -template -struct GlimpseExtractionOp { - GlimpseExtractionOp(const Index width, const Index height, - const std::vector >& offsets, - const bool normalized, - const bool centered, - const bool uniform_noise) : - width_(width), height_(height), offsets_(offsets), - normalized_(normalized), centered_(centered), uniform_noise_(uniform_noise) { } - - template - DSizes dimensions(const Input& input) const { - typedef typename internal::traits::Index IndexType; - typedef TensorRef::Scalar, 4, - internal::traits::Layout, IndexType> > Ref; - Ref in(input); - - DSizes dims = in.dimensions(); - - dims[0] = in.dimension(0); - dims[1] = width_; - dims[2] = height_; - dims[3] = in.dimension(3); - return dims; - } - - template - EIGEN_DEVICE_FUNC - void eval(const Input& input, Output& output, const Device& device) const - { - typedef typename internal::traits::Index IndexType; - typedef TensorRef::Scalar, 4, - internal::traits::Layout, IndexType> > Ref; - Ref in(input); - - const Index num_channels = in.dimension(0); - const Index input_width = in.dimension(1); - const Index input_height = in.dimension(2); - const Index batch_size = in.dimension(3); - eigen_assert(input_width > 0); - eigen_assert(input_height > 0); - - for (Index i = 0; i < batch_size; ++i) { - float x = offsets_[i].first, y = offsets_[i].second; - - // Un-normalize coordinates back to pixel space if normalized. - if (normalized_) { - x *= input_width; - y *= input_height; - } - // Un-center if coordinates are centered on the image center. - if (centered_) { - x /= 2.0f; - y /= 2.0f; - x += input_width / 2.0f; - y += input_height / 2.0f; - } - // Remove half of the glimpse window. - x -= width_ / 2.0f; - y -= height_ / 2.0f; - - const Index offset_x = (Index) x; - const Index offset_y = (Index) y; - Index glimpse_width = width_; - Index glimpse_height = height_; - bool partial_overlap = false; - DSizes slice_offset(0, offset_x, offset_y); - DSizes slice_extent(num_channels, width_, height_); - DSizes base_offset(0, 0, 0); - - if (offset_x < 0) { - slice_offset[1] = 0; - glimpse_width = (std::max)(0, width_ + offset_x); - slice_extent[1] = glimpse_width; - base_offset[1] = width_ - glimpse_width; - partial_overlap = true; - } else if (offset_x + width_ >= input_width) { - glimpse_width = (std::max)(0, input_width - offset_x); - slice_extent[1] = glimpse_width; - partial_overlap = true; - } - if (offset_y < 0) { - slice_offset[2] = 0; - glimpse_height = (std::max)(0, height_ + offset_y); - slice_extent[2] = glimpse_height; - base_offset[2] = height_ - glimpse_height; - partial_overlap = true; - } else if (offset_y + height_ >= input_height) { - glimpse_height = (std::max)(0, input_height - offset_y); - slice_extent[2] = glimpse_height; - partial_overlap = true; - } - slice_extent[1] = std::min(input_width, slice_extent[1]); - slice_extent[2] = std::min(input_height, slice_extent[2]); - - if (partial_overlap) { - if (uniform_noise_) { - // Initialize the glimpse with uniform noise. - typedef typename internal::remove_const< - typename internal::traits::Scalar>::type Scalar; - TensorFixedSize > mini; - mini.device(device) = input.template chip<3>(i).minimum(); - TensorFixedSize > range; - range.device(device) = - (input.template chip<3>(i).maximum() - mini).template cast(); - - DSizes glimpse_size(num_channels, width_, height_); - TensorMap > tmp(NULL, glimpse_size); - output.template chip<3>(i).device(device) = - mini.reshape(Sizes<1,1,1>()).broadcast(glimpse_size) + - (tmp.random() * range.reshape(Sizes<1,1,1>()).broadcast(glimpse_size)).template cast(); - } else { - // Initialize the glimpse with white noise: compute the mean and sigma - // of each channel, and use them to shape the gaussian. - DSizes glimpse_size(width_, height_); - DSizes input_size(input_width, input_height); - typedef typename internal::remove_const< - typename internal::traits::Scalar>::type Scalar; - - for (int j = 0; j < num_channels; ++j) { - TensorFixedSize > mean; - mean.device(device) = input.template chip<3>(i).template chip<0>(j).template cast().mean(); - TensorFixedSize > sigma; - sigma.device(device) = - (input.template chip<3>(i).template chip<0>(j).template cast() - mean.reshape(Sizes<1,1>()).broadcast(input_size)).square().mean().sqrt(); - TensorFixedSize > mini; - mini.device(device) = input.template chip<3>(i).template chip<0>(j).minimum(); - TensorFixedSize > maxi; - maxi.device(device) = input.template chip<3>(i).template chip<0>(j).maximum(); - - TensorMap > tmp(NULL, glimpse_size); - output.template chip<3>(i).template chip<0>(j).device(device) = - (mean.reshape(Sizes<1,1>()).broadcast(glimpse_size) + - (tmp.random(internal::NormalRandomGenerator()) * sigma.reshape(Sizes<1,1>()).broadcast(glimpse_size)).template cast()).cwiseMin(maxi.reshape(Sizes<1,1>()).broadcast(glimpse_size)).cwiseMax(mini.reshape(Sizes<1,1>()).broadcast(glimpse_size)); - } - } - - // Copy the part of the glimpse that cover the input image if any. - if (glimpse_width == 0 || glimpse_height == 0) { - continue; - } - output.template chip<3>(i).slice(base_offset, slice_extent).device(device) = input.template chip<3>(i).slice(slice_offset, slice_extent); - } else { - output.template chip<3>(i).device(device) = input.template chip<3>(i).slice(slice_offset, slice_extent); - } - } - } - - private: - const Index width_; - const Index height_; - const std::vector > offsets_; - const bool normalized_; - const bool centered_; - const bool uniform_noise_; -}; -} - - -template -EIGEN_ALWAYS_INLINE -static const TensorCustomUnaryOp::Index>, const Input> -ExtractGlimpses(const Input& input, - const typename internal::traits::Index width, - const typename internal::traits::Index height, - const std::vector >& offsets, - const bool normalized = true, const bool centered = true, - const bool uniform_noise = true) -{ - EIGEN_STATIC_ASSERT(internal::traits::Layout == ColMajor, YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT(internal::traits::NumDimensions == 4, YOU_MADE_A_PROGRAMMING_MISTAKE); - - typedef typename internal::traits::Index Index; - const GlimpseExtractionOp op(width, height, offsets, normalized, - centered, uniform_noise); - return input.customOp(op); -} - -} // end namespace Eigen - -#endif // EIGEN_CXX11_NEURAL_NETWORKS_ATTENTION_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/BackwardCuboidConvolutions.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/BackwardCuboidConvolutions.h deleted file mode 100644 index 12ce23444c..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/BackwardCuboidConvolutions.h +++ /dev/null @@ -1,523 +0,0 @@ -#ifndef EIGEN_CXX11_NEURAL_NETWORKS_BACKWARD_CUBOID_CONVOLUTIONS_H -#define EIGEN_CXX11_NEURAL_NETWORKS_BACKWARD_CUBOID_CONVOLUTIONS_H - -#include "Patch3d.h" - -namespace Eigen { - -/** CuboidConvolutionBackwardInput - * \ingroup CXX11_NeuralNetworks_Module - * - * \brief Computes the backprop for the input of a 3D convolution. - * - * The output_backward parameter is expected to be a tensor with a rank of 4 or more (channels, depth, height, width, and optionally others) - * The kernel parameter is expected to be a 5D tensor (filters, channels, kernel_depth, kernel_height, kernel_width) - * output_backward and kernel have to be in the same layout. - * - * The dimensions of the result will be filters, depth, height, width (and others if applicable). - * - * It is possible to swap the order of the depth, width and height dimensions provided that the same order is used in the input, the kernel, and the output. - * - * All dimension orders above are given for col-major, and should be reversed for row-major. - */ - -template -EIGEN_ALWAYS_INLINE static const typename internal::conditional< - internal::traits::Layout == ColMajor, - TensorReshapingOp< - const DSizes::Index, - internal::traits::NumDimensions>, - const TensorContractionOp< - const array< IndexPair::Index>, 2>, - const TensorReshapingOp< - const DSizes< typename internal::traits::Index, 3>, - const TensorReverseOp, const Kernel> - >, - const TensorReshapingOp< - const DSizes< typename internal::traits::Index, 3>, - const TensorVolumePatchOp - > - > - >, - TensorReshapingOp< - const DSizes::Index, - internal::traits::NumDimensions>, - const TensorContractionOp< - const array< IndexPair::Index>, 2>, - const TensorReshapingOp< - const DSizes< typename internal::traits::Index, 3>, - const TensorVolumePatchOp - >, - const TensorReshapingOp< - const DSizes::Index, 3>, - const TensorReverseOp, const Kernel> - > - > - > ->::type -CuboidConvolutionBackwardInput( - const Kernel& kernel, const OutputBackward& output_backward, - typename internal::traits::Index inputPlanes, - typename internal::traits::Index inputRows, - typename internal::traits::Index inputCols, - const DenseIndex stridePlanes = 1, const DenseIndex strideRows = 1, - const DenseIndex strideCols = 1) { - typedef typename internal::traits::Index TensorIndex; - const TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex> > kern(kernel); - const TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex> > out(output_backward); - - EIGEN_STATIC_ASSERT(internal::traits::Layout == internal::traits::Layout, YOU_MADE_A_PROGRAMMING_MISTAKE); - - static const bool isColMajor = (internal::traits::Layout == ColMajor); - - static const int NumDims = internal::traits::NumDimensions; - - // Number of filters to apply. This is the same as the output depth of the result - const TensorIndex kernelFilters = isColMajor ? kern.dimensions()[0] : kern.dimensions()[4]; - // Number of channels. This is the same as the input depth. - const TensorIndex kernelChannels = isColMajor ? kern.dimensions()[1] : kern.dimensions()[3]; - const TensorIndex kernelPlanes = isColMajor ? kern.dimensions()[2] : kern.dimensions()[2]; - const TensorIndex kernelRows = isColMajor ? kern.dimensions()[3] : kern.dimensions()[1]; - const TensorIndex kernelCols = isColMajor ? kern.dimensions()[4] : kern.dimensions()[0]; - - const TensorIndex outputPlanes = isColMajor ? out.dimensions()[1] : out.dimensions()[NumDims - 2]; - const TensorIndex outputRows = isColMajor ? out.dimensions()[2] : out.dimensions()[NumDims - 3]; - const TensorIndex outputCols = isColMajor ? out.dimensions()[3] : out.dimensions()[NumDims - 4]; - - TensorIndex forward_pad_z, forward_pad_y, forward_pad_x; - const TensorIndex size_z = ceil(inputPlanes / static_cast(stridePlanes)); - const TensorIndex size_y = ceil(inputRows / static_cast(strideRows)); - const TensorIndex size_x = ceil(inputCols / static_cast(strideCols)); - - // Infer padding type. - if (size_z == outputPlanes && size_y == outputRows && size_x == outputCols) { - // SAME padding. - const TensorIndex dz = size_z * stridePlanes + kernelPlanes - 1 - inputPlanes; - const TensorIndex dy = size_y * strideRows + kernelRows - 1 - inputRows; - const TensorIndex dx = size_x * strideCols + kernelCols - 1 - inputCols; - - forward_pad_z = dz - dz / 2; - forward_pad_y = dy - dy / 2; - forward_pad_x = dx - dx / 2; - } else { - // VALID padding. - forward_pad_z = 0; - forward_pad_y = 0; - forward_pad_x = 0; - } - const TensorIndex padding_ztop = kernelPlanes - 1 - forward_pad_z; - const TensorIndex padding_top = kernelRows - 1 - forward_pad_y; - const TensorIndex padding_left = kernelCols - 1 - forward_pad_x; - - const TensorIndex padding_zbottom = inputPlanes + kernelPlanes - 1 - (outputPlanes - 1) * stridePlanes - 1 - padding_ztop; - const TensorIndex padding_bottom = inputRows + kernelRows - 1 - (outputRows - 1) * strideRows - 1 - padding_top; - const TensorIndex padding_right = inputCols + kernelCols - 1 - (outputCols - 1) * strideCols - 1 - padding_left; - - eigen_assert(padding_ztop >= 0); - eigen_assert(padding_zbottom >= 0); - eigen_assert(padding_top >= 0); - eigen_assert(padding_left >= 0); - eigen_assert(padding_bottom >= 0); - eigen_assert(padding_right >= 0); - - // The kernel has dimensions filters X channels X patch_planes X patch_rows X patch_cols. - // We need to reverse the kernel along the spatial dimensions. - array kernel_reverse; - if (isColMajor) { - kernel_reverse[0] = false; - kernel_reverse[1] = false; - kernel_reverse[2] = true; - kernel_reverse[3] = true; - kernel_reverse[4] = true; - } else { - kernel_reverse[0] = true; - kernel_reverse[1] = true; - kernel_reverse[2] = true; - kernel_reverse[3] = false; - kernel_reverse[4] = false; - } - - DSizes kernel_dims; - if (isColMajor) { - kernel_dims[0] = kernelFilters; - kernel_dims[1] = kernelChannels; - kernel_dims[2] = kernelRows * kernelCols * kernelPlanes; - } else { - kernel_dims[0] = kernelRows * kernelCols * kernelPlanes; - kernel_dims[1] = kernelChannels; - kernel_dims[2] = kernelFilters; - } - - // The output_backward has dimensions out_depth X out_planes X out_rows X out_cols X OTHERS - // When we extract the image patches from output_backward, it will have dimensions: - // out_depth X (patch_planes * patch_rows * patch_cols) X (input_planes * input_rows * input_cols * OTHERS) - DSizes pre_contract_dims; - if (isColMajor) { - pre_contract_dims[0] = kernelFilters; - pre_contract_dims[1] = kernelRows * kernelCols * kernelPlanes; - pre_contract_dims[2] = inputRows * inputCols * inputPlanes; - for (int i = 4; i < NumDims; ++i) { - pre_contract_dims[2] *= out.dimension(i); - } - } else { - pre_contract_dims[2] = kernelFilters; - pre_contract_dims[1] = kernelRows * kernelCols * kernelPlanes; - pre_contract_dims[0] = inputRows * inputCols * inputPlanes; - for (int i = 0; i < NumDims - 4; ++i) { - pre_contract_dims[0] *= out.dimension(i); - } - } - - // We will contract along dimensions (0, 2) in kernel and (0, 1) in - // output_backward, if this is col-major, and - // dimensions (0, 2) in kernel and (1, 2) in output_backward, if this row-major. - array, 2> contract_dims; - if (isColMajor) { - // col-major: kernel.contract(output.patches) - contract_dims[0] = IndexPair(0, 0); - contract_dims[1] = IndexPair(2, 1); - } else { - // row-major: output.patches.contract(kernel) - contract_dims[0] = IndexPair(1, 0); - contract_dims[1] = IndexPair(2, 2); - } - - // Post contraction, the dimensions of the input_backprop is - // channels X input_planes X input_rows X input_cols X OTHERS - DSizes post_contract_dims; - if (isColMajor) { - post_contract_dims[0] = kernelChannels; - post_contract_dims[1] = inputPlanes; - post_contract_dims[2] = inputRows; - post_contract_dims[3] = inputCols; - for (int i = 4; i < NumDims; ++i) { - post_contract_dims[i] = out.dimension(i); - } - } else { - post_contract_dims[NumDims - 1] = kernelChannels; - post_contract_dims[NumDims - 2] = inputPlanes; - post_contract_dims[NumDims - 3] = inputRows; - post_contract_dims[NumDims - 4] = inputCols; - for (int i = 0; i < NumDims - 4; ++i) { - post_contract_dims[i] = out.dimension(i); - } - } - - DSizes strides; - for (int i = 0; i < NumDims; i++) { - strides[i] = 1; - } - if (isColMajor) { - strides[1] = stridePlanes; - strides[2] = strideRows; - strides[3] = strideCols; - } else { - strides[NumDims - 2] = stridePlanes; - strides[NumDims - 3] = strideRows; - strides[NumDims - 4] = strideCols; - } - - return choose( - Cond::Layout == ColMajor>(), - kernel.reverse(kernel_reverse) - .reshape(kernel_dims) - .contract( - output_backward.extract_volume_patches(kernelPlanes, kernelRows, kernelCols, - 1, 1, 1, stridePlanes, strideRows, strideCols, - padding_ztop, padding_zbottom, - padding_top, padding_bottom, - padding_left, padding_right) - .reshape(pre_contract_dims), - contract_dims) - .reshape(post_contract_dims), - output_backward.extract_volume_patches(kernelPlanes, kernelRows, kernelCols, - 1, 1, 1, stridePlanes, strideRows, strideCols, - padding_ztop, padding_zbottom, - padding_top, padding_bottom, - padding_left, padding_right) - .reshape(pre_contract_dims) - .contract(kernel.reverse(kernel_reverse).reshape(kernel_dims), - contract_dims) - .reshape(post_contract_dims)); -} - - -/** CuboidConvolutionBackwardKernel - * \ingroup CXX11_NeuralNetworks_Module - * - * \brief Computes the backprop for the filter of a 3D convolution. - * - * The output_backward parameter is expected to be a tensor with a rank of 4 or more (channels, depth, height, width, and optionally others) - * The kernel parameter is expected to be a 4D tensor (filters, channels, kernel_depth, kernel_height, kernel_width) - * output_backward and kernel have to be in the same layout. - * - * The dimensions of the result will be filters, depth, height, width (and others if applicable). - * - * It is possible to swap the order of the depth, width and height dimensions provided that the same order is used in the input, the kernel, and the output. - * - * All dimension orders above are given for col-major, and should be reversed for row-major. - */ -template -EIGEN_ALWAYS_INLINE static const typename internal::conditional< - internal::traits::Layout == ColMajor, - const TensorShufflingOp< - const array::Index, 5>, - const TensorReverseOp< - const array, - const TensorReshapingOp< - const DSizes::Index, 5>, - const TensorContractionOp< - const array< IndexPair::Index>, 2>, - const TensorReshapingOp< - const DSizes::Index, 3>, - const Input>, - const TensorReshapingOp< - const DSizes< typename internal::traits::Index, 4>, - const TensorVolumePatchOp - > - > - > - > - >, - const TensorShufflingOp< - const array::Index, 5>, - const TensorReverseOp< - const array, - const TensorReshapingOp< - const DSizes::Index, 5>, - const TensorContractionOp< - const array< IndexPair::Index>, 2>, - const TensorReshapingOp< - const DSizes< typename internal::traits::Index, 4>, - const TensorVolumePatchOp - >, - const TensorReshapingOp< - const DSizes::Index, 3>, - const Input - > - > - > - > - > ->::type -CuboidConvolutionBackwardKernel( - const Input& input, const OutputBackward& output_backward, - typename internal::traits::Index kernelPlanes, - typename internal::traits::Index kernelRows, - typename internal::traits::Index kernelCols, - const DenseIndex stridePlanes = 1, - const DenseIndex strideRows = 1, - const DenseIndex strideCols = 1) { - typedef typename internal::traits::Index TensorIndex; - TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex> > in(input); - TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex> > out(output_backward); - - EIGEN_STATIC_ASSERT(internal::traits::Layout == internal::traits::Layout, YOU_MADE_A_PROGRAMMING_MISTAKE); - - static const bool isColMajor = (internal::traits::Layout == ColMajor); - - static const int NumDims = internal::traits::NumDimensions; - EIGEN_STATIC_ASSERT(internal::traits::NumDimensions == internal::traits::NumDimensions, YOU_MADE_A_PROGRAMMING_MISTAKE); - - const TensorIndex inputPlanes = isColMajor ? in.dimension(1) : in.dimension(NumDims - 2); - const TensorIndex inputRows = isColMajor ? in.dimension(2) : in.dimension(NumDims - 3); - const TensorIndex inputCols = isColMajor ? in.dimension(3) : in.dimension(NumDims - 4); - - const TensorIndex outputPlanes = isColMajor ? out.dimension(1) : out.dimension(NumDims - 2); - const TensorIndex outputRows = isColMajor ? out.dimension(2) : out.dimension(NumDims - 3); - const TensorIndex outputCols = isColMajor ? out.dimension(3) : out.dimension(NumDims - 4); - - const TensorIndex kernelFilters = isColMajor ? out.dimension(0) : out.dimension(NumDims - 1); - const TensorIndex kernelChannels = isColMajor ? in.dimension(0) : in.dimension(NumDims - 1); - - TensorIndex forward_pad_z, forward_pad_y, forward_pad_x; - const TensorIndex size_z = ceil(inputPlanes / static_cast(stridePlanes)); - const TensorIndex size_y = ceil(inputRows / static_cast(strideRows)); - const TensorIndex size_x = ceil(inputCols / static_cast(strideCols)); - - // Infer padding type. - if (size_z == outputPlanes && size_y == outputRows && size_x == outputCols) { - // SAME padding. - const TensorIndex dz = size_z * stridePlanes + kernelPlanes - 1 - inputPlanes; - const TensorIndex dy = size_y * strideRows + kernelRows - 1 - inputRows; - const TensorIndex dx = size_x * strideCols + kernelCols - 1 - inputCols; - - forward_pad_z = dz - dz / 2; - forward_pad_y = dy - dy / 2; - forward_pad_x = dx - dx / 2; - } else { - // VALID padding. - forward_pad_z = 0; - forward_pad_y = 0; - forward_pad_x = 0; - } - - const TensorIndex padding_ztop = kernelPlanes - 1 - forward_pad_z; - const TensorIndex padding_top = kernelRows - 1 - forward_pad_y; - const TensorIndex padding_left = kernelCols - 1 - forward_pad_x; - - const TensorIndex padding_zbottom = inputPlanes + kernelPlanes - 1 - (outputPlanes - 1) * stridePlanes - 1 - padding_ztop; - const TensorIndex padding_bottom = inputRows + kernelRows - 1 - (outputRows - 1) * strideRows - 1 - padding_top; - const TensorIndex padding_right = inputCols + kernelCols - 1 - (outputCols - 1) * strideCols - 1 - padding_left; - - eigen_assert(padding_ztop >= 0); - eigen_assert(padding_zbottom >= 0); - eigen_assert(padding_top >= 0); - eigen_assert(padding_left >= 0); - eigen_assert(padding_bottom >= 0); - eigen_assert(padding_right >= 0); - - // The output_backward has dimensions out_depth X out_plaens X out_rows X out_cols X OTHERS - // When we extract the image patches from output_backward (with input as the - // kernel), it will have dimensions - // (out_depth) X (input_planes * input_rows * input_cols) X (kernel_planes * kernel_rows * kernel_cols) X OTHERS - DSizes pre_contract_dims; - if (isColMajor) { - pre_contract_dims[0] = kernelFilters; - pre_contract_dims[1] = inputRows * inputCols * inputPlanes; - pre_contract_dims[2] = kernelRows * kernelCols * kernelPlanes; - pre_contract_dims[3] = 1; - for (int i = 4; i < NumDims; ++i) { - pre_contract_dims[3] *= out.dimension(i); - } - } else { - pre_contract_dims[3] = kernelFilters; - pre_contract_dims[2] = inputRows * inputCols * inputPlanes; - pre_contract_dims[1] = kernelRows * kernelCols * kernelPlanes; - pre_contract_dims[0] = 1; - for (int i = 0; i < NumDims - 4; ++i) { - pre_contract_dims[0] *= out.dimension(i); - } - } - - // The input has dimensions in_depth X (input_planes * input_rows * input_cols) X OTHERS - DSizes input_dims; - if (isColMajor) { - input_dims[0] = kernelChannels; - input_dims[1] = inputRows * inputCols * inputPlanes; - input_dims[2] = 1; - for (int i = 4; i < NumDims; ++i) { - input_dims[2] *= in.dimension(i); - } - eigen_assert(input_dims[2] == pre_contract_dims[3]); - } else { - input_dims[2] = kernelChannels; - input_dims[1] = inputRows * inputCols * inputPlanes; - input_dims[0] = 1; - for (int i = 0; i < NumDims - 4; ++i) { - input_dims[0] *= in.dimension(i); - } - eigen_assert(input_dims[0] == pre_contract_dims[0]); - } - - // We will contract along dimensions (1, 2) in in and (1, 3) in out, if - // this is col-major. - // For row-major, it's dimensions (0, 1) in in and (0, 2) in out. - array, 2> contract_dims; - if (isColMajor) { - // col-major: in.contract(output.patches) - contract_dims[0] = IndexPair(1, 1); - contract_dims[1] = IndexPair(2, 3); - } else { - // row-major: output.patches.contract(in) - contract_dims[0] = IndexPair(0, 0); - contract_dims[1] = IndexPair(2, 1); - } - - // After the contraction, the kernel will have dimension - // in_depth X out_depth X kernel_patches X kernel_rows X kernel_cols - // We will need to shuffle the first two dimensions and reverse the spatial dimensions. - // The end shape is: - // out_depth X in_shape X kernel_planes X kernel_rows X kernel_cols - - // This is the shape of the kernel *before* the shuffling. - DSizes kernel_dims; - if (isColMajor) { - kernel_dims[0] = kernelChannels; - kernel_dims[1] = kernelFilters; - kernel_dims[2] = kernelPlanes; - kernel_dims[3] = kernelRows; - kernel_dims[4] = kernelCols; - } else { - kernel_dims[0] = kernelCols; - kernel_dims[1] = kernelRows; - kernel_dims[2] = kernelPlanes; - kernel_dims[3] = kernelFilters; - kernel_dims[4] = kernelChannels; - } - - // Flip filters and channels. - array kernel_shuffle; - if (isColMajor) { - kernel_shuffle[0] = 1; - kernel_shuffle[1] = 0; - kernel_shuffle[2] = 2; - kernel_shuffle[3] = 3; - kernel_shuffle[4] = 4; - } else { - kernel_shuffle[0] = 0; - kernel_shuffle[1] = 1; - kernel_shuffle[2] = 2; - kernel_shuffle[3] = 4; - kernel_shuffle[4] = 3; - } - - // Reverse the spatial dimensions. - array kernel_reverse; - if (isColMajor) { - kernel_reverse[0] = false; - kernel_reverse[1] = false; - kernel_reverse[2] = true; - kernel_reverse[3] = true; - kernel_reverse[4] = true; - } else { - kernel_reverse[0] = true; - kernel_reverse[1] = true; - kernel_reverse[2] = true; - kernel_reverse[3] = false; - kernel_reverse[4] = false; - } - - DSizes strides; - for (int i = 0; i < NumDims; i++) { - strides[i] = 1; - } - if (isColMajor) { - strides[1] = stridePlanes; - strides[2] = strideRows; - strides[3] = strideCols; - } else { - strides[NumDims - 2] = stridePlanes; - strides[NumDims - 3] = strideRows; - strides[NumDims - 4] = strideCols; - } - return choose( - Cond::Layout == ColMajor>(), - input.reshape(input_dims) - .contract( - output_backward.extract_volume_patches( - inputPlanes, inputRows, inputCols, 1, - 1, 1, stridePlanes, strideRows, strideCols, - - padding_ztop, padding_zbottom, padding_top, - padding_bottom, padding_left, padding_right) - .reshape(pre_contract_dims), - contract_dims) - .reshape(kernel_dims) - .reverse(kernel_reverse) - .shuffle(kernel_shuffle), - output_backward.extract_volume_patches( - inputPlanes, inputRows, inputCols, 1, 1, 1, - stridePlanes, strideRows, strideCols, padding_ztop, - padding_zbottom, padding_top, padding_bottom, - padding_left, padding_right) - .reshape(pre_contract_dims) - .contract(input.reshape(input_dims), contract_dims) - .reshape(kernel_dims) - .reverse(kernel_reverse) - .shuffle(kernel_shuffle)); -} - -} // end namespace Eigen - -#endif // EIGEN_CXX11_NEURAL_NETWORKS_BACKWARD_CUBOID_CONVOLUTIONS_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/BackwardSpatialConvolutions.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/BackwardSpatialConvolutions.h deleted file mode 100644 index 0f4ada246c..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/BackwardSpatialConvolutions.h +++ /dev/null @@ -1,351 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2015 Ke Yang -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_NEURAL_NETWORKS_BACKWARD_SPATIAL_CONVOLUTIONS_H -#define EIGEN_CXX11_NEURAL_NETWORKS_BACKWARD_SPATIAL_CONVOLUTIONS_H - -namespace Eigen { - -/** SpatialConvolutionBackwardInput - * \ingroup CXX11_NeuralNetworks_Module - * - * \brief Computes the backprop for the input of a 2D convolution. - * - * The output_backward parameter is expected to be a tensor with a rank of 3 or more (channels, height, width, and optionally others) - * The kernel parameter is expected to be a 4D tensor (filters, channels, kernel_height, kernel_width) - * The output_backward and the kernel must both be in col-major layout. The result will also be in col-major layout. - * - * If in_stride > 1, then applies convolution with holes (aka atrous convolution), sampling every in_stride input pixels. - * - * The result can be assigned to a tensor of rank equal to the rank of the output_backward. The dimensions of the result will be filters, height, width (and others if applicable). - * - * It is possible to swap the order of the width and height dimensions provided that the same order is used in the input, the kernel, and the output. - * - */ - -template -EIGEN_ALWAYS_INLINE -static const typename internal::conditional< - internal::traits::Layout == ColMajor, - TensorReshapingOp::Index, internal::traits::NumDimensions>, const TensorContractionOp::Index>, 2>, const TensorReshapingOp::Index, 3>, const TensorReverseOp, const Kernel> >, const TensorReshapingOp::Index, 3>, const TensorImagePatchOp > > >, - TensorReshapingOp::Index, internal::traits::NumDimensions>, const TensorContractionOp::Index>, 2>, const TensorReshapingOp::Index, 3>, const TensorImagePatchOp >, const TensorReshapingOp::Index, 3>, const TensorReverseOp, const Kernel> > > > >::type -SpatialConvolutionBackwardInput(const Kernel& kernel, const OutputBackward& output_backward, typename internal::traits::Index inputRows, typename internal::traits::Index inputCols, const DenseIndex stride = 1, const DenseIndex in_stride = 1) { - - typedef typename internal::traits::Index TensorIndex; - TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex> > kern(kernel); - TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex> > out(output_backward); - - EIGEN_STATIC_ASSERT(internal::traits::Layout == internal::traits::Layout, YOU_MADE_A_PROGRAMMING_MISTAKE); - - static const bool isColMajor = (internal::traits::Layout == ColMajor); - - static const int NumDims = internal::traits::NumDimensions; - - // Number of filters to apply. This is the same as the output depth of the result - const TensorIndex kernelFilters = isColMajor ? kern.dimensions()[0] : kern.dimensions()[3]; - // Number of channels. This is the same as the input depth. - const TensorIndex kernelChannels = isColMajor ? kern.dimensions()[1] : kern.dimensions()[2]; - const TensorIndex kernelRows = isColMajor ? kern.dimensions()[2] : kern.dimensions()[1]; - const TensorIndex kernelCols = isColMajor ? kern.dimensions()[3] : kern.dimensions()[0]; - - // This is the effective kernel size, taking into account the (in_stride - 1) zero-values - // inserted between consecutive kernel elements in atrous convolution - const TensorIndex kernelRowsEff = kernelRows + (kernelRows - 1) * (in_stride - 1); - const TensorIndex kernelColsEff = kernelCols + (kernelCols - 1) * (in_stride - 1); - - const TensorIndex outputRows = isColMajor ? output_backward.dimension(1) : output_backward.dimension(NumDims - 2); - const TensorIndex outputCols = isColMajor ? output_backward.dimension(2) : output_backward.dimension(NumDims - 3); - - // Computing the forward padding - const TensorIndex forward_pad_top = ((outputRows - 1) * stride + kernelRowsEff - inputRows) / 2; - const TensorIndex forward_pad_left = ((outputCols - 1) * stride + kernelColsEff - inputCols) / 2; - - const TensorIndex padding_top = kernelRowsEff - 1 - forward_pad_top; - const TensorIndex padding_left = kernelColsEff - 1 - forward_pad_left; - const TensorIndex padding_bottom = inputRows + kernelRowsEff - 1 - (outputRows - 1) * stride - 1 - padding_top; - const TensorIndex padding_right = inputCols + kernelColsEff - 1 - (outputCols - 1) * stride - 1 - padding_left; - - eigen_assert(padding_top >= 0); - eigen_assert(padding_left >= 0); - eigen_assert(padding_bottom >= 0); - eigen_assert(padding_right >= 0); - - // The kernel has dimensions filters X channels X patch_rows X patch_cols - // We need to reverse the kernel along dimensions corresponding to rows and - // cols. - // TODO(yangke): we can make things slightly faster by collapsing the dimensions - // where we don't reverse. Try that once we have a faster compiler. - array kernel_reverse; - if (isColMajor) { - kernel_reverse[0] = false; - kernel_reverse[1] = false; - kernel_reverse[2] = true; - kernel_reverse[3] = true; - } else { - kernel_reverse[0] = true; - kernel_reverse[1] = true; - kernel_reverse[2] = false; - kernel_reverse[3] = false; - } - - DSizes kernel_dims; - if (isColMajor) { - kernel_dims[0] = kernelFilters; - kernel_dims[1] = kernelChannels; - kernel_dims[2] = kernelRows * kernelCols; - } else { - kernel_dims[0] = kernelRows * kernelCols; - kernel_dims[1] = kernelChannels; - kernel_dims[2] = kernelFilters; - } - - // The output_backward has dimensions out_depth X out_rows X out_cols X OTHERS - // When we extract the image patches from output_backward, it will have dimensions - // out_depth X (patch_rows * patch_cols) X (input_rows * input_cols * OTHERS) - DSizes pre_contract_dims; - if (isColMajor) { - pre_contract_dims[0] = kernelFilters; - pre_contract_dims[1] = kernelRows * kernelCols; - pre_contract_dims[2] = inputRows * inputCols; - for (int i = 3; i < NumDims; ++i) { - pre_contract_dims[2] *= out.dimension(i); - } - } else { - pre_contract_dims[2] = kernelFilters; - pre_contract_dims[1] = kernelRows * kernelCols; - pre_contract_dims[0] = inputRows * inputCols; - for (int i = 0; i < NumDims - 3; ++i) { - pre_contract_dims[0] *= out.dimension(i); - } - } - - // We will contract along dimensions (0, 2) in kernel and (0, 1) in - // output_backward, if this is col-major, and - // dimensions (0, 2) in kernel and (1, 2) in output_backward, if this row-major. - array, 2> contract_dims; - if (isColMajor) { - // col-major: kernel.contract(output.patches) - contract_dims[0] = IndexPair(0, 0); - contract_dims[1] = IndexPair(2, 1); - } else { - // row-major: output.patches.contract(kernel) - contract_dims[0] = IndexPair(1, 0); - contract_dims[1] = IndexPair(2, 2); - } - - // Post contraction, the dimensions of the input_backprop is - // channels X input_rows X input_cols X OTHERS - DSizes post_contract_dims; - if (isColMajor) { - post_contract_dims[0] = kernelChannels; - post_contract_dims[1] = inputRows; - post_contract_dims[2] = inputCols; - for (int i = 3; i < NumDims; ++i) { - post_contract_dims[i] = out.dimension(i); - } - } else { - post_contract_dims[NumDims - 1] = kernelChannels; - post_contract_dims[NumDims - 2] = inputRows; - post_contract_dims[NumDims - 3] = inputCols; - for (int i = 0; i < NumDims - 3; ++i) { - post_contract_dims[i] = out.dimension(i); - } - } - - return choose(Cond::Layout == ColMajor>(), - kernel.reverse(kernel_reverse).reshape(kernel_dims).contract(output_backward.extract_image_patches(kernelRows, kernelCols, 1, 1, in_stride, in_stride, stride, stride, padding_top, padding_bottom, padding_left, padding_right, 0).reshape(pre_contract_dims), contract_dims).reshape(post_contract_dims), - output_backward.extract_image_patches(kernelRows, kernelCols, 1, 1, in_stride, in_stride, stride, stride, padding_top, padding_bottom, padding_left, padding_right, 0).reshape(pre_contract_dims).contract(kernel.reverse(kernel_reverse).reshape(kernel_dims), contract_dims).reshape(post_contract_dims)); -} - - -/** SpatialConvolutionBackwardKernel - * \ingroup CXX11_NeuralNetworks_Module - * - * \brief Computes the backprop for the filter of a 2D convolution. - * - * The output_backward parameter is expected to be a tensor with a rank of 3 or more (channels, height, width, and optionally others) - * The kernel parameter is expected to be a 4D tensor (filters, channels, kernel_height, kernel_width) - * The output_backward and the kernel must both be in col-major layout. The result will also be in col-major layout. - * - * If in_stride > 1, then applies convolution with holes (aka atrous convolution), sampling every in_stride input pixels. - * - * The result can be assigned to a tensor of rank equal to the rank of the output_backward. The dimensions of the result will be filters, height, width (and others if applicable). - * - * It is possible to swap the order of the width and height dimensions provided that the same order is used in the input, the kernel, and the output. - * - */ -// TODO(gpapan): Resolve a bug in TensorContractionInputMapper at SpatialConvolutions.h that yangke circumvented by using .reshape().reshape(). -// This can significantly accelerate SpatialConvolutionBackwardKernel. - -template -EIGEN_ALWAYS_INLINE -static const typename internal::conditional< - internal::traits::Layout == ColMajor, - const TensorShufflingOp::Index, 4>, const TensorReverseOp, const TensorReshapingOp::Index, 4>, const TensorContractionOp::Index>, 2>, const TensorReshapingOp::Index, 3>, const Input>, const TensorReshapingOp::Index, 4>, const TensorReshapingOp::Index, 4>, const TensorImagePatchOp > > > > > >, - const TensorShufflingOp::Index, 4>, const TensorReverseOp, const TensorReshapingOp::Index, 4>, const TensorContractionOp::Index>, 2>, const TensorReshapingOp::Index, 4>, const TensorReshapingOp::Index, 4>, const TensorImagePatchOp > >, const TensorReshapingOp::Index, 3>, const Input> > > > > >::type -SpatialConvolutionBackwardKernel(const Input& input, const OutputBackward& output_backward, typename internal::traits::Index kernelRows, typename internal::traits::Index kernelCols, const DenseIndex stride = 1, const DenseIndex in_stride = 1) { - - typedef typename internal::traits::Index TensorIndex; - TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex> > in(input); - TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex> > out(output_backward); - - EIGEN_STATIC_ASSERT(internal::traits::Layout == internal::traits::Layout, YOU_MADE_A_PROGRAMMING_MISTAKE); - - // stride and in_stride cannot both be larger than 1 - eigen_assert(!(stride > 1 && in_stride > 1)); - - static const bool isColMajor = (internal::traits::Layout == ColMajor); - - static const int NumDims = internal::traits::NumDimensions; - EIGEN_STATIC_ASSERT(internal::traits::NumDimensions == internal::traits::NumDimensions, YOU_MADE_A_PROGRAMMING_MISTAKE); - - const TensorIndex inputRows = isColMajor ? in.dimension(1) : in.dimension(NumDims - 2); - const TensorIndex inputCols = isColMajor ? in.dimension(2) : in.dimension(NumDims - 3); - - const TensorIndex outputRows = isColMajor ? output_backward.dimension(1) : output_backward.dimension(NumDims - 2); - const TensorIndex outputCols = isColMajor ? output_backward.dimension(2) : output_backward.dimension(NumDims - 3); - - // Number of filters to apply. This is the same as the output depth of the result - const TensorIndex kernelFilters = isColMajor ? out.dimensions()[0] : out.dimensions()[NumDims - 1]; - - // Number of channels. This is the same as the input depth. - const TensorIndex kernelChannels = isColMajor ? in.dimensions()[0] : in.dimensions()[NumDims - 1]; - - // This is the effective kernel size, taking into account the (in_stride - 1) zero-values - // inserted between consecutive kernel elements in atrous convolution - const TensorIndex kernelRowsEff = kernelRows + (kernelRows - 1) * (in_stride - 1); - const TensorIndex kernelColsEff = kernelCols + (kernelCols - 1) * (in_stride - 1); - - // Computing the forward padding - const TensorIndex forward_pad_top = ((outputRows - 1) * stride + kernelRowsEff - inputRows) / 2; - const TensorIndex forward_pad_left = ((outputCols - 1) * stride + kernelColsEff - inputCols) / 2; - - // TODO: factor out the padding computation. - const TensorIndex padding_top = kernelRowsEff - 1 - forward_pad_top; - const TensorIndex padding_left = kernelColsEff - 1 - forward_pad_left; - const TensorIndex padding_bottom = inputRows + kernelRowsEff - 1 - (outputRows - 1) * stride - 1 - padding_top; - const TensorIndex padding_right = inputCols + kernelColsEff - 1 - (outputCols - 1) * stride - 1 - padding_left; - - eigen_assert(padding_top >= 0); - eigen_assert(padding_left >= 0); - eigen_assert(padding_bottom >= 0); - eigen_assert(padding_right >= 0); - - // The output_backward has dimensions out_depth X out_rows X out_cols X OTHERS - // When we extract the image patches from output_backward (with input as the - // kernel), it will have dimensions - // (out_depth) X (input_rows * input_cols) X (kernel_rows * kernel_cols) X OTHERS - DSizes pre_contract_dims; - if (isColMajor) { - pre_contract_dims[0] = kernelFilters; - pre_contract_dims[1] = inputRows * inputCols; - pre_contract_dims[2] = kernelRows * kernelCols; - pre_contract_dims[3] = 1; - for (int i = 3; i < NumDims; ++i) { - pre_contract_dims[3] *= out.dimension(i); - } - } else { - pre_contract_dims[3] = kernelFilters; - pre_contract_dims[2] = inputRows * inputCols; - pre_contract_dims[1] = kernelRows * kernelCols; - pre_contract_dims[0] = 1; - for (int i = 0; i < NumDims - 3; ++i) { - pre_contract_dims[0] *= out.dimension(i); - } - } - - // The input has dimensions in_depth X (input_rows * input_cols) X OTHERS - DSizes input_dims; - if (isColMajor) { - input_dims[0] = kernelChannels; - input_dims[1] = inputRows * inputCols; - input_dims[2] = 1; - for (int i = 3; i < NumDims; ++i) { - input_dims[2] *= in.dimension(i); - } - eigen_assert(input_dims[2] == pre_contract_dims[3]); - } else { - input_dims[2] = kernelChannels; - input_dims[1] = inputRows * inputCols; - input_dims[0] = 1; - for (int i = 0; i < NumDims - 3; ++i) { - input_dims[0] *= in.dimension(i); - } - eigen_assert(input_dims[0] == pre_contract_dims[0]); - } - - // We will contract along dimensions (1, 2) in and (1, 3) in out, if - // this is col-major. - // For row-major, it's dimensions (0, 1) in and (0, 2) in out. - array, 2> contract_dims; - if (isColMajor) { - // col-major: in.contract(output.patches) - contract_dims[0] = IndexPair(1, 1); - contract_dims[1] = IndexPair(2, 3); - } else { - // row-major: output.patches.contract(in) - contract_dims[0] = IndexPair(0, 0); - contract_dims[1] = IndexPair(2, 1); - } - - // After the contraction, the kernel will have dimension - // in_depth X out_depth X kernel_rows X kernel_cols - // We will need to shuffle the first two dimensions and reverse the latter - // two dimensions. - // The end shape is - // out_depth X in_shape X kernel_rows X kernel_cols - - // This is the shape of the kernel *before* the shuffling. - DSizes kernel_dims; - if (isColMajor) { - kernel_dims[0] = kernelChannels; - kernel_dims[1] = kernelFilters; - kernel_dims[2] = kernelRows; - kernel_dims[3] = kernelCols; - } else { - kernel_dims[0] = kernelCols; - kernel_dims[1] = kernelRows; - kernel_dims[2] = kernelFilters; - kernel_dims[3] = kernelChannels; - } - - array kernel_shuffle; - if (isColMajor) { - kernel_shuffle[0] = 1; - kernel_shuffle[1] = 0; - kernel_shuffle[2] = 2; - kernel_shuffle[3] = 3; - } else { - kernel_shuffle[0] = 0; - kernel_shuffle[1] = 1; - kernel_shuffle[2] = 3; - kernel_shuffle[3] = 2; - } - - array kernel_reverse; - if (isColMajor) { - kernel_reverse[0] = false; - kernel_reverse[1] = false; - kernel_reverse[2] = true; - kernel_reverse[3] = true; - } else { - kernel_reverse[0] = true; - kernel_reverse[1] = true; - kernel_reverse[2] = false; - kernel_reverse[3] = false; - } - - return choose(Cond::Layout == ColMajor>(), - input.reshape(input_dims).contract(output_backward.extract_image_patches(inputRows, inputCols, in_stride, in_stride, 1, 1, stride, stride, padding_top, padding_bottom, padding_left, padding_right, 0).reshape(pre_contract_dims).reshape(pre_contract_dims), contract_dims).reshape(kernel_dims).reverse(kernel_reverse).shuffle(kernel_shuffle), - output_backward.extract_image_patches(inputRows, inputCols, in_stride, in_stride, 1, 1, stride, stride, padding_top, padding_bottom, padding_left, padding_right, 0).reshape(pre_contract_dims).reshape(pre_contract_dims).contract(input.reshape(input_dims), contract_dims).reshape(kernel_dims).reverse(kernel_reverse).shuffle(kernel_shuffle)); -} - -} // end namespace Eigen - -#endif // EIGEN_CXX11_NEURAL_NETWORKS_BACKWARD_SPATIAL_CONVOLUTIONS_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/CuboidConvolution.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/CuboidConvolution.h deleted file mode 100644 index dfb9dcedba..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/CuboidConvolution.h +++ /dev/null @@ -1,179 +0,0 @@ -#ifndef EIGEN_CXX11_SRC_NEURAL_NETWORKS_CUBOID_CONVOLUTION_H -#define EIGEN_CXX11_SRC_NEURAL_NETWORKS_CUBOID_CONVOLUTION_H - -#include "Patch3d.h" - -namespace Eigen { - -/** CuboidConvolution - * \ingroup CXX11_NeuralNetworks_Module - * - * \brief Applies a 3D convolution over a multichannel input voxel block. - * - * The input parameter is expected to be a tensor with a rank of 4 or more (channels, depth, height, width, and optionally others). - * The kernel parameter is expected to be a 5D tensor (filters, channels, kernel_depth, kernel_height, kernel_width). - * The result can be assigned to a tensor of rank equal to the rank of the input. The dimensions of the result will be filters, depth, height, width (and others if applicable). - * - * The input and kernel have to be in the same layout, and both row-major and - * col-major are supported. The shapes given above are for col-major layout. - * For row-major, all dimensions should be reversed. - * - * It is possible to swap the order of the depth, width, and height dimensions provided that the same order is used in the input, the kernel, and the output. - */ -template -EIGEN_ALWAYS_INLINE -static const typename internal::conditional < - internal::traits::Layout == ColMajor, - TensorReshapingOp< - const DSizes::Index, - internal::traits::NumDimensions>, - const TensorContractionOp< - const array::Index>, 1>, - const TensorReshapingOp< - const DSizes::Index, 2>, - const Kernel>, - const TensorReshapingOp< - const DSizes::Index, 2>, - const TensorVolumePatchOp > > >, - TensorReshapingOp< - const DSizes::Index, - internal::traits::NumDimensions>, - const TensorContractionOp< - const array::Index>, 1>, - const TensorReshapingOp< - const DSizes::Index, 2>, - const TensorVolumePatchOp > , - const TensorReshapingOp< - const DSizes::Index, 2>, - const Kernel> > > >::type -CuboidConvolution(const Input& input, const Kernel& kernel, - const DenseIndex stridePlanes = 1, - const DenseIndex strideRows = 1, - const DenseIndex strideCols = 1, - const PaddingType padding_type = PADDING_SAME) { - typedef typename internal::traits::Index TensorIndex; - TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex> > in(input); - TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex> > kern(kernel); - - EIGEN_STATIC_ASSERT(internal::traits::Layout == internal::traits::Layout, YOU_MADE_A_PROGRAMMING_MISTAKE); - static const bool isColMajor = (internal::traits::Layout == ColMajor); - static const int NumDims = internal::traits::NumDimensions; - - // Number of filters to apply. This is the same as the output depth of the result. - const TensorIndex kernelFilters = isColMajor ? kern.dimensions()[0] : kern.dimensions()[4]; - const TensorIndex kernelChannels = isColMajor ? kern.dimensions()[1] : kern.dimensions()[3]; - - // Spatial size of the kernel. - const TensorIndex kernelDepth = isColMajor ? kern.dimensions()[2] : kern.dimensions()[2]; - const TensorIndex kernelRows = isColMajor ? kern.dimensions()[3] : kern.dimensions()[1]; - const TensorIndex kernelCols = isColMajor ? kern.dimensions()[4] : kern.dimensions()[0]; - - if (isColMajor) { - eigen_assert(kernelChannels == in.dimension(0)); - } else { - eigen_assert(kernelChannels == in.dimension(NumDims - 1)); - } - - const TensorIndex inputPlanes = isColMajor ? in.dimension(1) : in.dimension(NumDims - 2); - const TensorIndex inputRows = isColMajor ? in.dimension(2) : in.dimension(NumDims - 3); - const TensorIndex inputCols = isColMajor ? in.dimension(3) : in.dimension(NumDims - 4); - - const float stride_planes_f = static_cast(stridePlanes); - const float stride_rows_f = static_cast(strideRows); - const float stride_cols_f = static_cast(strideCols); - TensorIndex out_depth; - TensorIndex out_height; - TensorIndex out_width; - switch (padding_type) { - case PADDING_VALID: - out_depth = ceil((inputPlanes - kernelDepth + 1.f) / stride_planes_f); - out_height = ceil((inputRows - kernelRows + 1.f) / stride_rows_f); - out_width = ceil((inputCols - kernelCols + 1.f) / stride_cols_f); - break; - case PADDING_SAME: - out_depth = ceil(inputPlanes / stride_planes_f); - out_height = ceil(inputRows / stride_rows_f); - out_width = ceil(inputCols / stride_cols_f); - break; - default: - eigen_assert(false && "unexpected padding"); - } - - DSizes kernel_dims; - if (isColMajor) { - kernel_dims[0] = kernelFilters; - kernel_dims[1] = kernelChannels * kernelDepth * kernelRows * kernelCols; - } else { - kernel_dims[0] = kernelChannels * kernelDepth * kernelRows * kernelCols; - kernel_dims[1] = kernelFilters; - } - - // Molds the output of the patch extraction result into a 2D tensor: - // - the first dimension (dims[0]): the patch values to be multiplied with the kernels - // - the second dimension (dims[1]): everything else - DSizes pre_contract_dims; - if (isColMajor) { - pre_contract_dims[0] = kernelChannels * kernelDepth * kernelRows * kernelCols; - pre_contract_dims[1] = out_depth * out_height * out_width; - for (int i = 4; i < NumDims; ++i) { - pre_contract_dims[1] *= in.dimension(i); - } - } else { - pre_contract_dims[1] = kernelChannels * kernelDepth * kernelRows * kernelCols; - pre_contract_dims[0] = out_depth * out_height * out_width; - for (int i = 0; i < NumDims - 4; ++i) { - pre_contract_dims[0] *= in.dimension(i); - } - } - - array, 1> contract_dims; - contract_dims[0] = IndexPair(1, 0); - - // Molds the output of the contraction into the shape expected by the user - // (assuming ColMajor): - // - 1st dim: kernel filters - // - 2nd dim: output depth - // - 3nd dim: output height - // - 4rd dim: output width - // - 5th dim and beyond: everything else including batch size - DSizes post_contract_dims; - if (isColMajor) { - post_contract_dims[0] = kernelFilters; - post_contract_dims[1] = out_depth; - post_contract_dims[2] = out_height; - post_contract_dims[3] = out_width; - for (int i = 4; i < NumDims; ++i) { - post_contract_dims[i] = in.dimension(i); - } - } else { - post_contract_dims[NumDims - 1] = kernelFilters; - post_contract_dims[NumDims - 2] = out_depth; - post_contract_dims[NumDims - 3] = out_height; - post_contract_dims[NumDims - 4] = out_width; - for (int i = 0; i < NumDims - 4; ++i) { - post_contract_dims[i] = in.dimension(i); - } - } - - return choose( - Cond::Layout == ColMajor>(), - kernel.reshape(kernel_dims) - .contract(input.extract_volume_patches( - kernelDepth, kernelRows, kernelCols, stridePlanes, - strideRows, strideCols, padding_type) - .reshape(pre_contract_dims), - contract_dims) - .reshape(post_contract_dims), - input.extract_volume_patches(kernelDepth, kernelRows, kernelCols, - stridePlanes, strideRows, strideCols, - padding_type) - .reshape(pre_contract_dims) - .contract(kernel.reshape(kernel_dims), contract_dims) - .reshape(post_contract_dims)); -} - -} // end namespace Eigen - -#endif // EIGEN_CXX11_SRC_NEURAL_NETWORKS_CUBOID_CONVOLUTION_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Patch3d.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Patch3d.h deleted file mode 100644 index 2864f83299..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Patch3d.h +++ /dev/null @@ -1,240 +0,0 @@ -#ifndef EIGEN_CXX11_SRC_NEURAL_NETWORKS_PATCH3D_H -#define EIGEN_CXX11_SRC_NEURAL_NETWORKS_PATCH3D_H - -#if not defined(__CUDACC__) -#include -#endif - -namespace Eigen { -namespace internal { - -/** Extract3DPatches - * \ingroup CXX11_NeuralNetworksModule - * - * \brief Extracts 3D patches from a multichannel input volume. - * - * The input parameter is expected to be a tensor with a rank of 4 or more - * (channels, depth, height, width, optional others in col-major, and the - * reverse order in row-major). - - * The return value will be a tensor of 3 more dimension than the input tensor. - * In col-major, the first 4 dimensions of the result are: channels, patch_depth, - * patch_height, patch_width. The next dimensions will identify the patch - * position on the 3D grid of extracted patches: z, y, x. The remaining - * dimensions, if any, will be the same as the 'other' dimensions of the input - * tensor. - */ - -template -EIGEN_ALWAYS_INLINE static const TensorStridingOp< - const array::Index, - internal::traits::NumDimensions + 3>, - const TensorReshapingOp< - const DSizes::Index, - internal::traits::NumDimensions + 3>, - const TensorPatchOp< - const DSizes::Index, - internal::traits::NumDimensions>, - const TensorPaddingOp< - const array::Index>, - internal::traits::NumDimensions>, - const Input> > > > -Extract3DPatches( - const Input& input, const DenseIndex patchPlanes, - const DenseIndex patchRows, const DenseIndex patchCols, - const DenseIndex stridePlanes, const DenseIndex strideRows, - const DenseIndex strideCols, - const DenseIndex paddingZTop, const DenseIndex paddingZBottom, - const DenseIndex paddingTop, const DenseIndex paddingBottom, - const DenseIndex paddingLeft, const DenseIndex paddingRight, - const typename internal::traits::Scalar padding_value = 0) { - - typedef typename internal::traits::Index TensorIndex; - TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex> > in(input); - - EIGEN_STATIC_ASSERT(internal::traits::NumDimensions >= 4, YOU_MADE_A_PROGRAMMING_MISTAKE); - - static const bool isColMajor = (internal::traits::Layout == ColMajor); - static const int NumDims = internal::traits::NumDimensions; - static const int ExtDims = NumDims + 3; - - // Tensor size after patch extraction. We add three dimensions to unpack the - // linear patch index into a 3D grid over which stride() can work. - DSizes pre_stride_dims; - - if (isColMajor) { - pre_stride_dims[0] = in.dimension(0); - pre_stride_dims[1] = patchPlanes; - pre_stride_dims[2] = patchRows; - pre_stride_dims[3] = patchCols; - } else { - pre_stride_dims[ExtDims - 1] = in.dimension(NumDims - 1); - pre_stride_dims[ExtDims - 4] = patchCols; - pre_stride_dims[ExtDims - 3] = patchRows; - pre_stride_dims[ExtDims - 2] = patchPlanes; - } - - const TensorIndex inputPlanes = isColMajor ? in.dimension(1) : in.dimension(NumDims - 2); - const TensorIndex inputRows = isColMajor ? in.dimension(2) : in.dimension(NumDims - 3); - const TensorIndex inputCols = isColMajor ? in.dimension(3) : in.dimension(NumDims - 4); - - array, NumDims> paddings; - for (int i = 0; i < NumDims; ++i) { - paddings[i] = IndexPair(0, 0); - } - - paddings[isColMajor ? 1 : (NumDims - 2)] = IndexPair(paddingZTop, paddingZBottom); - paddings[isColMajor ? 2 : (NumDims - 3)] = IndexPair(paddingTop, paddingBottom); - paddings[isColMajor ? 3 : (NumDims - 4)] = IndexPair(paddingLeft, paddingRight); - - pre_stride_dims[isColMajor ? 4 : (ExtDims - 5)] = inputPlanes + paddingZBottom + paddingZTop - patchPlanes + 1; - pre_stride_dims[isColMajor ? 5 : (ExtDims - 6)] = inputRows + paddingTop + paddingBottom - patchRows + 1; - pre_stride_dims[isColMajor ? 6 : (ExtDims - 7)] = inputCols + paddingLeft + paddingRight - patchCols + 1; - - if (isColMajor) { - for (int i = 7; i < NumDims + 3; ++i) { - pre_stride_dims[i] = in.dimension(i - 3); - } - } else { - for (int i = 0; i < NumDims - 4; ++i) { - pre_stride_dims[i] = in.dimension(i); - } - } - - DSizes patch_dims; - if (isColMajor) { - patch_dims[0] = in.dimension(0); - patch_dims[1] = patchPlanes; - patch_dims[2] = patchRows; - patch_dims[3] = patchCols; - for (int i = 4; i < NumDims; ++i) { - patch_dims[i] = 1; - } - } else { - patch_dims[NumDims - 1] = in.dimension(NumDims - 1); - patch_dims[NumDims - 4] = patchCols; - patch_dims[NumDims - 3] = patchRows; - patch_dims[NumDims - 2] = patchPlanes; - for (int i = 0; i < NumDims - 4; i++) { - patch_dims[i] = 1; - } - } - - array strides; - if (isColMajor) { - // No striding within the patches. - for (int i = 0; i < 4; ++i) { - strides[i] = 1; - } - // Apply striding in the spatial patch grid dimensions only. - strides[4] = stridePlanes; - strides[5] = strideRows; - strides[6] = strideCols; - // No striding in the remaining dimensions (batches, ...). - for (int i = 7; i < NumDims + 3; i++) { - strides[i] = 1; - } - } else { - // No striding within the patches. - for (int i = 1; i <= 4; ++i) { - strides[ExtDims - i] = 1; - } - // Apply striding in the spatial patch grid dimensions only. - strides[ExtDims - 7] = strideCols; - strides[ExtDims - 6] = strideRows; - strides[ExtDims - 5] = stridePlanes; - // No striding in the remaining dimensions (batches, ...). - for (int i = 0; i < NumDims - 4; i++) { - strides[i] = 1; - } - } - - // TODO(mjanusz): Consider getting rid of pad(), and stride() and extend - // extract_patches to take additional parameters for padding/striding, - // similarly to extract_image_patches. - return input.pad(paddings, padding_value).extract_patches(patch_dims).reshape(pre_stride_dims).stride(strides); -} - - -template -EIGEN_ALWAYS_INLINE static const TensorStridingOp< - const array::Index, - internal::traits::NumDimensions + 3>, - const TensorReshapingOp< - const DSizes::Index, - internal::traits::NumDimensions + 3>, - const TensorPatchOp< - const DSizes::Index, - internal::traits::NumDimensions>, - const TensorPaddingOp< - const array::Index>, - internal::traits::NumDimensions>, - const Input> > > > -Extract3DPatches( - const Input& input, const DenseIndex patchPlanes, - const DenseIndex patchRows, const DenseIndex patchCols, - const DenseIndex stridePlanes, const DenseIndex strideRows, - const DenseIndex strideCols, const PaddingType padding_type, - const typename internal::traits::Scalar padding_value = 0) { - typedef typename internal::traits::Index TensorIndex; - TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex> > in(input); - - EIGEN_STATIC_ASSERT(internal::traits::NumDimensions >= 4, YOU_MADE_A_PROGRAMMING_MISTAKE); - - static const bool isColMajor = (internal::traits::Layout == ColMajor); - static const int NumDims = internal::traits::NumDimensions; - - const TensorIndex inputPlanes = isColMajor ? in.dimension(1) : in.dimension(NumDims - 2); - const TensorIndex inputRows = isColMajor ? in.dimension(2) : in.dimension(NumDims - 3); - const TensorIndex inputCols = isColMajor ? in.dimension(3) : in.dimension(NumDims - 4); - - switch (padding_type) { - case PADDING_VALID: - // No padding in any dimension. - return Extract3DPatches(input, patchPlanes, patchRows, patchCols, - stridePlanes, strideRows, strideCols, - 0, 0, 0, 0, 0, 0, padding_value); - case PADDING_SAME: { - // The side of the tensor before striding should be just the expected - // output times the stride. - const TensorIndex size_z = ceil(inputPlanes / static_cast(stridePlanes)) * stridePlanes; - const TensorIndex size_y = ceil(inputRows / static_cast(strideRows)) * strideRows; - const TensorIndex size_x = ceil(inputCols / static_cast(strideCols)) * strideCols; - - // The size of the patch space is going to be: padded_input_size - patch_size + 1. - // This has to match the expected size before striding (pre_stride_dims). - // The deltas below extend the input to the expected size. - const TensorIndex dz = size_z + patchPlanes - 1 - inputPlanes; - const TensorIndex dy = size_y + patchRows - 1 - inputRows; - const TensorIndex dx = size_x + patchCols - 1 - inputCols; - - return Extract3DPatches(input, patchPlanes, patchRows, patchCols, - stridePlanes, strideRows, strideCols, - dz - dz / 2, dz / 2, - dy - dy / 2, dy / 2, - dx - dx / 2, dx / 2, - padding_value); - } - default: - eigen_assert(false && "unexpected padding"); - // unreachable code to avoid missing return warning. - return Extract3DPatches(input, patchPlanes, patchRows, patchCols, - stridePlanes, strideRows, strideCols, - 0, 0, 0, 0, 0, 0, padding_value); - } -} - -// TODO(mjanusz): Switch this to a 'using' alias once CUDA supports C++11. -template -struct Extract3DPatchesType { - typedef const TensorStridingOp< const array::Index, internal::traits::NumDimensions + 3>, - const TensorReshapingOp< const DSizes::Index, internal::traits::NumDimensions + 3>, - const TensorPatchOp< const DSizes::Index, internal::traits::NumDimensions>, - const TensorPaddingOp< const array< IndexPair::Index>, internal::traits::NumDimensions>, - const Input> > > > type; -}; - -} // end namespace internal -} // end namespace Eigen - -#endif // EIGEN_CXX11_SRC_NEURAL_NETWORKS_PATCH3D_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Pooling.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Pooling.h deleted file mode 100644 index 942b060ba7..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Pooling.h +++ /dev/null @@ -1,433 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -#ifndef EIGEN_CXX11_NEURAL_NETWORKS_POOLING_H -#define EIGEN_CXX11_NEURAL_NETWORKS_POOLING_H - -#include "Patch3d.h" - -namespace Eigen { - -/** SpatialMaxPooling - * \ingroup CXX11_NeuralNetworks_Module - * - * \brief Applies a max-pooling over a multichannel input image. - * - * The input parameter is expected to be a with a rank of 4 (channels, height, width, others in col-major, and the reverse of that in row-major). - * - * The result can be assigned to a tensor of rank equal to the rank of the input. The dimensions of the result will be channels, height, width, and others (in col-major, and the reverse of that if the input was row-major). - * - * The order of the width and height dimensions can be swapped if needed. - * -*/ -#if !defined(EIGEN_HAS_INDEX_LIST) -template -EIGEN_ALWAYS_INLINE -static const TensorReshapingOp::Index, internal::traits::NumDimensions>, const TensorReductionOp::Scalar>::type>, const Eigen::array, const TensorImagePatchOp > > -#else -template -EIGEN_ALWAYS_INLINE -static const TensorReshapingOp::Index, internal::traits::NumDimensions>, const TensorReductionOp::Scalar>::type>, typename internal::conditional::Layout == ColMajor, const Eigen::IndexList, Eigen::type2index<2> >, const Eigen::IndexList, Eigen::type2index<3> > >::type, const TensorImagePatchOp > > -#endif -SpatialMaxPooling(const Input& input, DenseIndex patchRows, DenseIndex patchCols, - DenseIndex strideRows, DenseIndex strideCols, const PaddingType padding_type, - DenseIndex in_strideRows = 1, DenseIndex in_strideCols = 1) -{ - EIGEN_STATIC_ASSERT(internal::traits::NumDimensions == 4, YOU_MADE_A_PROGRAMMING_MISTAKE); - - typedef typename internal::traits::Index TensorIndex; - TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex> > in(input); - - const DenseIndex patchRowsEff = patchRows + (patchRows - 1) * (in_strideRows - 1); - const DenseIndex patchColsEff = patchCols + (patchCols - 1) * (in_strideCols - 1); - - static const bool isColMajor = (internal::traits::Layout == ColMajor); - static const int idxRows = isColMajor ? 1 : 2; - static const int idxCols = isColMajor ? 2 : 1; - - // Molds the output of the reduction into the shape expected by the user. - // (assuming col-major): - // - 1st dim: channels - // - 2nd dim: output height - // - 3rd dim: output width - // - 4th dim and beyond: everything else including batch size - Eigen::DSizes::NumDimensions> post_reduce_dims; - post_reduce_dims[0] = in.dimension(0); - if (padding_type == PADDING_VALID) { - post_reduce_dims[idxRows] = numext::ceil((in.dimension(idxRows) - patchRowsEff + 1.f) / static_cast(strideRows)); - post_reduce_dims[idxCols] = numext::ceil((in.dimension(idxCols) - patchColsEff + 1.f) / static_cast(strideCols)); - } else { - post_reduce_dims[idxRows] = numext::ceil(in.dimension(idxRows) / static_cast(strideRows)); - post_reduce_dims[idxCols] = numext::ceil(in.dimension(idxCols) / static_cast(strideCols)); - } - post_reduce_dims[3] = in.dimension(3); - -#if !defined(EIGEN_HAS_INDEX_LIST) - // nvcc doesn't support cxx11 - Eigen::array reduction_dims; - if (isColMajor) { - reduction_dims[0] = 1; - reduction_dims[1] = 2; - } else { - reduction_dims[0] = 2; - reduction_dims[1] = 3; - } -#else - // Take advantage of cxx11 to give the compiler information it can use to - // optimize the code. - typename internal::conditional::Layout == ColMajor, const Eigen::IndexList, Eigen::type2index<2> >, const Eigen::IndexList, Eigen::type2index<3> > >::type reduction_dims; -#endif - - return input.extract_image_patches(patchRows, patchCols, strideRows, strideCols, in_strideRows, in_strideCols, padding_type, -Eigen::NumTraits::Scalar>::type>::highest()).maximum(reduction_dims).reshape(post_reduce_dims); -} - -/** CuboidMaxPooling - * \ingroup CXX11_NeuralNetworks_Module - * - * \brief Applies a max-pooling over a multichannel input volume. - * - * The input parameter is expected to be a tensor with a rank of 5 (channels, depth, height, width, others in col-major, and the reverse of that in row-major). - * - * The result can be assigned to a tensor of rank equal to the rank of the input. The dimensions of the result will be channels, depth, height, width, and others (in col-major, and the reverse of that if the input was row-major). - * - * The order of the depth, width and height dimensions can be swapped if needed. - * -*/ -#if !defined(EIGEN_HAS_INDEX_LIST) -template -EIGEN_ALWAYS_INLINE static const TensorReshapingOp< - const Eigen::DSizes::NumDimensions>, - const TensorReductionOp< - internal::MaxReducer, const Eigen::array, - const TensorReshapingOp< - const Eigen::DSizes, - const TensorVolumePatchOp > > > -#else -template -EIGEN_ALWAYS_INLINE static const TensorReshapingOp< - const Eigen::DSizes::NumDimensions>, - const TensorReductionOp< - internal::MaxReducer, - const Eigen::IndexList >, - const TensorReshapingOp< - const Eigen::DSizes, - const TensorVolumePatchOp > > > -#endif -CuboidMaxPooling(const Input& input, DenseIndex patchPlanes, - DenseIndex patchRows, DenseIndex patchCols, - DenseIndex stridePlanes, DenseIndex strideRows, - DenseIndex strideCols, const PaddingType padding_type) { - EIGEN_STATIC_ASSERT(internal::traits::NumDimensions == 5, YOU_MADE_A_PROGRAMMING_MISTAKE); - static const bool isColMajor = (internal::traits::Layout == ColMajor); - - typedef typename internal::traits::Index TensorIndex; - TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex> > in(input); - - static const int idxPlanes = isColMajor ? 1 : 3; - static const int idxRows = 2; - static const int idxCols = isColMajor ? 3 : 1; - - // Molds the output of the reduction into the shape expected by the used - // (assuming col-major): - // - 1st dim: channels - // - 2nd dim: output depth - // - 3rd dim: output height - // - 4th dim: output width - // - 5th dim and beyond: everything else including batch size - Eigen::DSizes::NumDimensions> post_reduce_dims; - post_reduce_dims[0] = in.dimension(0); - if (padding_type == PADDING_VALID) { - post_reduce_dims[idxPlanes] = numext::ceil((in.dimension(idxPlanes) - patchPlanes + 1.f) / static_cast(stridePlanes)); - post_reduce_dims[idxRows] = numext::ceil((in.dimension(idxRows) - patchRows + 1.f) / static_cast(strideRows)); - post_reduce_dims[idxCols] = numext::ceil((in.dimension(idxCols) - patchCols + 1.f) / static_cast(strideCols)); - } else { - post_reduce_dims[idxPlanes] = numext::ceil(in.dimension(idxPlanes) / static_cast(stridePlanes)); - post_reduce_dims[idxRows] = numext::ceil(in.dimension(idxRows) / static_cast(strideRows)); - post_reduce_dims[idxCols] = numext::ceil(in.dimension(idxCols) / static_cast(strideCols)); - } - post_reduce_dims[4] = in.dimension(4); - - Eigen::DSizes pre_reduce_dims; - pre_reduce_dims[1] = patchRows * patchCols * patchPlanes; - if (isColMajor) { - pre_reduce_dims[0] = post_reduce_dims[0]; - pre_reduce_dims[2] = post_reduce_dims[1] * post_reduce_dims[2] * post_reduce_dims[3] * post_reduce_dims[4]; - } else { - pre_reduce_dims[0] = post_reduce_dims[0] * post_reduce_dims[1] * post_reduce_dims[2] * post_reduce_dims[3]; - pre_reduce_dims[2] = post_reduce_dims[4]; - } - -#if !defined(EIGEN_HAS_INDEX_LIST) - // nvcc doesn't support cxx11 - Eigen::array reduction_dims; - reduction_dims[0] = 1; -#else - // Take advantage of cxx11 to give the compiler information it can use to - // optimize the code. - Eigen::IndexList > reduction_dims; -#endif - return input.extract_volume_patches(patchPlanes, patchRows, patchCols, - stridePlanes, strideRows, strideCols, - padding_type, -Eigen::NumTraits::highest()) - .reshape(pre_reduce_dims) - .maximum(reduction_dims) - .reshape(post_reduce_dims); -} - - -/** SpatialAvgPooling - * \ingroup CXX11_NeuralNetworks_Module - * - * \brief Applies an average pooling over a multichannel input image. - * - * The input parameter is expected to be a tensor with a rank of 4 (channels, height, width, others in col-major, and the reverse of that in row-major). - * - * The result can be assigned to a tensor of rank equal to the rank of the input. The dimensions of the result will be channels, height, width, and others (in col-major, and the reverse of that if the input was row-major). - * - * The order of the width and height dimensions can be swapped if needed. - * -*/ -namespace internal { - -template struct AvgPoolMeanReducer -{ -#if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__) - // We only support packet access for floats. - static const bool PacketAccess = internal::is_same::value; -#else - static const bool PacketAccess = false; -#endif - static const bool IsStateful = true; - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE AvgPoolMeanReducer() : scalarCount_(0) { - typedef typename packet_traits::type Packet; - packetCount_ = pset1(0.0); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) { - if (t != -Eigen::NumTraits::highest()) { - (*accum) = (*accum) + t; - scalarCount_++; - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const { - return static_cast(0); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const { - eigen_assert(scalarCount_ > 0); - return accum / scalarCount_; - } - -#if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__) -#ifdef EIGEN_VECTORIZE_AVX -#define pequal(a,b) _mm256_cmp_ps(a,b,_CMP_EQ_UQ) -#define psel(a,b,false_mask) _mm256_blendv_ps(a,b,false_mask) -#else -#define pequal(a,b) _mm_cmpeq_ps(a,b) -#define psel(a,b,false_mask) _mm_or_ps(_mm_andnot_ps(false_mask, a), _mm_and_ps(false_mask, b)) -#endif - - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) { - reducePacketWithType(static_cast(0), p, accum); - } - - template - void reducePacketWithType(T, const Packet& p, Packet* accum) { - Packet skip_mask = pequal(p, pset1(-Eigen::NumTraits::highest())); - (*accum) = padd(*accum, psel(p, pset1(0), skip_mask)); - packetCount_ = padd(packetCount_, psel(pset1(1), pset1(0), skip_mask)); - } - - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const { - return pset1(0); - } - - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const { - return pdiv(vaccum, packetCount_); - } - template - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const { - return (saccum + predux(vaccum)) / (scalarCount_ + predux(packetCount_)); - } -#endif - - protected: - typedef typename packet_traits::type Packet; - int scalarCount_; - Packet packetCount_; -}; - -} // namespace internal - -#if !defined(EIGEN_HAS_INDEX_LIST) -template -EIGEN_ALWAYS_INLINE -static const TensorReshapingOp::Index, internal::traits::NumDimensions>, const TensorReductionOp::Scalar>::type>, const Eigen::array, const TensorImagePatchOp > > -#else -template -EIGEN_ALWAYS_INLINE -static const TensorReshapingOp::Index, internal::traits::NumDimensions>, const TensorReductionOp::Scalar>::type>, typename internal::conditional::Layout == ColMajor, const Eigen::IndexList, Eigen::type2index<2> >, const Eigen::IndexList, Eigen::type2index<3> > >::type, const TensorImagePatchOp > > -#endif -SpatialAvgPooling(const Input& input, DenseIndex patchRows, DenseIndex patchCols, - DenseIndex strideRows, DenseIndex strideCols, const PaddingType padding_type, - DenseIndex in_strideRows = 1, DenseIndex in_strideCols = 1) -{ - EIGEN_STATIC_ASSERT(internal::traits::NumDimensions == 4, YOU_MADE_A_PROGRAMMING_MISTAKE); - - typedef typename internal::traits::Index TensorIndex; - TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex> > in(input); - - const DenseIndex patchRowsEff = patchRows + (patchRows - 1) * (in_strideRows - 1); - const DenseIndex patchColsEff = patchCols + (patchCols - 1) * (in_strideCols - 1); - - static const bool isColMajor = (internal::traits::Layout == ColMajor); - static const int idxRows = isColMajor ? 1 : 2; - static const int idxCols = isColMajor ? 2 : 1; - - // Molds the output of the reduction into the shape expected by the user. - // (assuming col-major): - // - 1st dim: channels - // - 2nd dim: output height - // - 3rd dim: output width - // - 4th dim and beyond: everything else including batch size - Eigen::DSizes::NumDimensions> post_reduce_dims; - post_reduce_dims[0] = in.dimension(0); - if (padding_type == PADDING_VALID) { - post_reduce_dims[idxRows] = numext::ceil((in.dimension(idxRows) - patchRowsEff + 1.f) / static_cast(strideRows)); - post_reduce_dims[idxCols] = numext::ceil((in.dimension(idxCols) - patchColsEff + 1.f) / static_cast(strideCols)); - } else { - post_reduce_dims[idxRows] = numext::ceil(in.dimension(idxRows) / static_cast(strideRows)); - post_reduce_dims[idxCols] = numext::ceil(in.dimension(idxCols) / static_cast(strideCols)); - } - post_reduce_dims[3] = in.dimension(3); - - typedef typename internal::remove_const::Scalar>::type CoeffReturnType; - internal::AvgPoolMeanReducer mean_with_nan; - -#if !defined(EIGEN_HAS_INDEX_LIST) - // nvcc doesn't support cxx11 - Eigen::array reduction_dims; - if (isColMajor) { - reduction_dims[0] = 1; - reduction_dims[1] = 2; - } else { - reduction_dims[0] = 2; - reduction_dims[1] = 3; - } -#else - // Take advantage of cxx11 to give the compiler information it can use to - // optimize the code. - typename internal::conditional::Layout == ColMajor, const Eigen::IndexList, Eigen::type2index<2> >, const Eigen::IndexList, Eigen::type2index<3> > >::type reduction_dims; -#endif - return input.extract_image_patches(patchRows, patchCols, strideRows, strideCols, in_strideRows, in_strideCols, padding_type, -Eigen::NumTraits::Scalar>::type>::highest()).reduce(reduction_dims, mean_with_nan).reshape(post_reduce_dims); -} - - -/** CuboidAvgPooling - * \ingroup CXX11_NeuralNetworks_Module - * - * \brief Applies an average pooling over a multichannel input volume. - * - * The input parameter is expected to be a tensor with a rank of 5 (channels, depth, height, width, others, and the reverse of that in row-major). - * - * The result can be assigned to a tensor of rank equal to the rank of the input. The dimensions of the result will be channels, depth, width, and others (in col-major, and the reverse of that if the input was row-major). - * - * The order of the depth, width and height dimensions can be swapped if needed. - * -*/ -#if !defined(EIGEN_HAS_INDEX_LIST) -template -EIGEN_ALWAYS_INLINE static const TensorReshapingOp< - const Eigen::DSizes::NumDimensions>, - const TensorReductionOp< - internal::AvgPoolMeanReducer, const Eigen::array, - const TensorReshapingOp< - const Eigen::DSizes, - const TensorVolumePatchOp > > > -#else -template -EIGEN_ALWAYS_INLINE static const TensorReshapingOp< - const Eigen::DSizes::NumDimensions>, - const TensorReductionOp< - internal::AvgPoolMeanReducer, - const Eigen::IndexList >, - const TensorReshapingOp< - const Eigen::DSizes, - const TensorVolumePatchOp > > > -#endif -CuboidAvgPooling(const Input& input, DenseIndex patchPlanes, - DenseIndex patchRows, DenseIndex patchCols, - DenseIndex stridePlanes, DenseIndex strideRows, - DenseIndex strideCols, const PaddingType padding_type) { - EIGEN_STATIC_ASSERT(internal::traits::NumDimensions == 5, YOU_MADE_A_PROGRAMMING_MISTAKE); - static const bool isColMajor = (internal::traits::Layout == ColMajor); - - typedef typename internal::traits::Index TensorIndex; - TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex> > in(input); - - static const int idxPlanes = isColMajor ? 1 : 3; - static const int idxRows = 2; - static const int idxCols = isColMajor ? 3 : 1; - // Molds the output of the reduction into the shape expected by the used - // (assuming col-major): - // - 1st dim: channels - // - 2nd dim: outupt depth - // - 3rd dim: output height - // - 4th dim: output width - // - 5th dim and beyond: everything else including batch size - Eigen::DSizes::NumDimensions> post_reduce_dims; - post_reduce_dims[0] = in.dimension(0); - if (padding_type == PADDING_VALID) { - post_reduce_dims[idxPlanes] = numext::ceil((in.dimension(idxPlanes) - patchPlanes + 1.f) / static_cast(stridePlanes)); - post_reduce_dims[idxRows] = numext::ceil((in.dimension(idxRows) - patchRows + 1.f) / static_cast(strideRows)); - post_reduce_dims[idxCols] = numext::ceil((in.dimension(idxCols) - patchCols + 1.f) / static_cast(strideCols)); - } else { - post_reduce_dims[idxPlanes] = numext::ceil(in.dimension(idxPlanes) / static_cast(stridePlanes)); - post_reduce_dims[idxRows] = numext::ceil(in.dimension(idxRows) / static_cast(strideRows)); - post_reduce_dims[idxCols] = numext::ceil(in.dimension(idxCols) / static_cast(strideCols)); - } - post_reduce_dims[4] = in.dimension(4); - - Eigen::DSizes pre_reduce_dims; - pre_reduce_dims[1] = patchRows * patchCols * patchPlanes; - if (isColMajor) { - pre_reduce_dims[0] = post_reduce_dims[0]; - pre_reduce_dims[2] = post_reduce_dims[1] * post_reduce_dims[2] * post_reduce_dims[3] * post_reduce_dims[4]; - } else { - pre_reduce_dims[0] = post_reduce_dims[0] * post_reduce_dims[1] * post_reduce_dims[2] * post_reduce_dims[3]; - pre_reduce_dims[2] = post_reduce_dims[4]; - } - - typedef typename internal::remove_const::Scalar>::type CoeffReturnType; - internal::AvgPoolMeanReducer mean_with_nan; - -#if !defined(EIGEN_HAS_INDEX_LIST) - // nvcc doesn't support cxx11 - Eigen::array reduction_dims; - reduction_dims[0] = 1; -#else - // Take advantage of cxx11 to give the compiler information it can use to - // optimize the code. - Eigen::IndexList > reduction_dims; -#endif - return input.extract_volume_patches(patchPlanes, patchRows, patchCols, - stridePlanes, strideRows, strideCols, - padding_type, -Eigen::NumTraits::highest()) - .reshape(pre_reduce_dims) - .reduce(reduction_dims, mean_with_nan) - .reshape(post_reduce_dims); -} - -} // end namespace Eigen - -#endif // EIGEN_CXX11_NEURAL_NETWORKS_POOLING_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/SoftMax.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/SoftMax.h deleted file mode 100644 index f0e21ab9c2..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/SoftMax.h +++ /dev/null @@ -1,83 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -#ifndef EIGEN_CXX11_NEURAL_NETWORKS_SOFTMAX_H -#define EIGEN_CXX11_NEURAL_NETWORKS_SOFTMAX_H - -namespace Eigen { - -/** SoftMax - * \ingroup CXX11_NeuralNetworks_Module - * - * \brief Applies a softmax - * - * The input parameter is expected to be a col-major tensor with a rank of 2 (depth and other). - * - * The result can be assigned to a tensor of rank and dimensions equal to that of the input. The result will be laid out in col-major order. - * -*/ - -namespace { -class SoftmaxOp { - public: - EIGEN_ALWAYS_INLINE SoftmaxOp(const float beta) : beta_(beta) { } - - template EIGEN_ALWAYS_INLINE - typename Input::Dimensions dimensions(const Input& input) const { - return input.dimensions(); - } - - template - void eval(const Input& input, Output& output, const Device& device) const - { -#if !defined(EIGEN_HAS_INDEX_LIST) - // nvcc doesn't support cxx11 - Eigen::array::Index, 1> depth_dim; - depth_dim[0] = 0; - Eigen::array::Index, 2> bcast; - bcast[0] = dimensions(input)[0]; - bcast[1] = 1; - DSizes::Index, 2> dims2d; - dims2d[0] = 1; - dims2d[1] = dimensions(input)[1]; -#else - // Take advantage of cxx11 to give the compiler information it can use to - // optimize the code. - Eigen::IndexList> depth_dim; - Eigen::IndexList> bcast; - bcast.set(0, dimensions(input)[0]); - Eigen::IndexList, typename internal::traits::Index> dims2d; - dims2d.set(1, dimensions(input)[1]); -#endif - - output.device(device) = ((input - input.maximum(depth_dim).eval().reshape(dims2d).broadcast(bcast)) * beta_).exp(); - output.device(device) = output / (output.sum(depth_dim).eval().reshape(dims2d).broadcast(bcast)); - } - - private: - const float beta_; -}; -} - - -template -EIGEN_ALWAYS_INLINE -static const TensorCustomUnaryOp -SoftMax(const Input& input, const float beta) -{ - EIGEN_STATIC_ASSERT(internal::traits::Layout == ColMajor, YOU_MADE_A_PROGRAMMING_MISTAKE); - EIGEN_STATIC_ASSERT(internal::traits::NumDimensions == 2, YOU_MADE_A_PROGRAMMING_MISTAKE); - - const SoftmaxOp op(beta); - return input.customOp(op); -} - - -} // end namespace Eigen - -#endif // EIGEN_CXX11_NEURAL_NETWORKS_SOFTMAX_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/SpatialConvolutions.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/SpatialConvolutions.h deleted file mode 100644 index 8e2ddca6b5..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/SpatialConvolutions.h +++ /dev/null @@ -1,775 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -#ifndef EIGEN_CXX11_NEURAL_NETWORKS_SPATIAL_CONVOLUTIONS_H -#define EIGEN_CXX11_NEURAL_NETWORKS_SPATIAL_CONVOLUTIONS_H - -namespace Eigen { - -namespace internal { - -// These optimizations require vector instructions -#ifdef EIGEN_VECTORIZE - -// TODO: Consolidate this part of the code with the image patch extraction code -// since they are both very similar. -template -class TensorContractionInputMapper >, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> -{ - public: - typedef TensorContractionInputMapper >, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Self; - typedef TensorContractionSubMapper >, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> SubMapper; - typedef SubMapper VectorMapper; - typedef SubMapper LinearMapper; - typedef Scalar_ Scalar; - typedef typename packet_traits::type Packet; - - TensorContractionInputMapper(const TensorEvaluator >, Device>& tensor, - const nocontract_t&, const nocontract_t&, - const contract_t&, const contract_t&) - : m_impl(tensor.impl().impl()) - { - Index patch_rows; - Index patch_depth; - if (internal::traits::Layout == ColMajor) { - patch_depth = tensor.impl().dimensions()[0]; - patch_rows = tensor.impl().dimensions()[1]; - m_patch_cols = tensor.impl().dimensions()[2]; - m_num_patches = tensor.impl().dimensions()[3]; - } else { - static const int NumDims = tensor.impl().dimensions().size(); - patch_depth = tensor.impl().dimensions()[NumDims - 1]; - patch_rows = tensor.impl().dimensions()[NumDims - 2]; - m_patch_cols = tensor.impl().dimensions()[NumDims - 3]; - m_num_patches = tensor.impl().dimensions()[NumDims - 4]; - } - m_patch_row_inflate_strides = tensor.impl().rowInflateStride(); - m_patch_col_inflate_strides = tensor.impl().colInflateStride(); - - m_colStride = patch_rows; - - m_outputRows = tensor.impl().outputRows(); - m_row_strides = tensor.impl().userRowStride(); - m_col_strides = tensor.impl().userColStride(); - - m_in_row_strides = tensor.impl().userInRowStride(); - m_in_col_strides = tensor.impl().userInColStride(); - - if (internal::traits::Layout == ColMajor) { - m_inputRows = tensor.impl().impl().dimensions()[1]; - m_inputCols = tensor.impl().impl().dimensions()[2]; - } else { - static const int NumDims = tensor.impl().impl().dimensions().size(); - m_inputRows = tensor.impl().impl().dimensions()[NumDims - 2]; - m_inputCols = tensor.impl().impl().dimensions()[NumDims - 3]; - } - - m_rowInputStride = patch_depth; - m_colInputStride = patch_depth * m_inputRows; - m_patchInputStride = patch_depth * m_inputRows * m_inputCols; - - m_rowPaddingTop = tensor.impl().rowPaddingTop(); - m_colPaddingLeft = tensor.impl().colPaddingLeft(); - - m_fastInputRowStride = internal::TensorIntDivisor(m_patch_row_inflate_strides); - m_fastInputColStride = internal::TensorIntDivisor(m_patch_col_inflate_strides); - m_fastNumPatches = internal::TensorIntDivisor(m_num_patches); - m_fastColStride = internal::TensorIntDivisor(m_colStride); - m_fastOutputRows = internal::TensorIntDivisor(m_outputRows); - m_fastDimZero = internal::TensorIntDivisor(patch_depth); - } - - TensorContractionInputMapper(const TensorContractionInputMapper& base_mapper) : - m_impl(base_mapper.m_impl) { - m_patch_cols = base_mapper.m_patch_cols; - m_num_patches = base_mapper.m_num_patches; - m_patch_row_inflate_strides = base_mapper.m_patch_row_inflate_strides; - m_patch_col_inflate_strides = base_mapper.m_patch_col_inflate_strides; - - m_colStride = base_mapper.m_colStride; - - m_rowInputStride = base_mapper.m_rowInputStride; - m_colInputStride = base_mapper.m_colInputStride; - m_patchInputStride = base_mapper.m_patchInputStride; - - m_inputRows = base_mapper.m_inputRows; - m_inputCols = base_mapper.m_inputCols; - - m_outputRows = base_mapper.m_outputRows; - m_row_strides = base_mapper.m_row_strides; - m_col_strides = base_mapper.m_col_strides; - - m_in_row_strides = base_mapper.m_in_row_strides; - m_in_col_strides = base_mapper.m_in_col_strides; - - m_rowPaddingTop = base_mapper.m_rowPaddingTop; - m_colPaddingLeft = base_mapper.m_colPaddingLeft; - - m_fastInputRowStride = base_mapper.m_fastInputRowStride; - m_fastInputColStride = base_mapper.m_fastInputColStride; - m_fastNumPatches = base_mapper.m_fastNumPatches; - m_fastColStride = base_mapper.m_fastColStride; - m_fastOutputRows = base_mapper.m_fastOutputRows; - m_fastDimZero = base_mapper.m_fastDimZero; - } - - // If true, turns off some optimizations for loading packets since the image - // patches are "non-standard" such as there are non-trivial strides or - // inflations in the input. - EIGEN_DEVICE_FUNC - EIGEN_ALWAYS_INLINE bool nonStandardPatches() const { - return m_in_row_strides != 1 || m_in_col_strides != 1 || m_patch_row_inflate_strides != 1 || m_patch_col_inflate_strides != 1; - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE SubMapper getSubMapper(Index i, Index j) const { - return SubMapper(*this, i, j); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE LinearMapper getLinearMapper(Index i, Index j) const { - return LinearMapper(*this, i, j); - } - - EIGEN_DEVICE_FUNC - EIGEN_ALWAYS_INLINE Scalar operator()(Index row) const { - Index rowIndex, colIndex, otherIndex; - computeBaseIndices(0, rowIndex, colIndex, otherIndex); - return loadCoeff(row, rowIndex, colIndex, otherIndex); - } - - // Load the coefficient at the patchIndex location instead of the usual m_rowIndex, - // m_colIndex, m_otherIndex. This is currently only used by the gpu code. EIGEN_DEVICE_FUNC - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar operator()(Index row, Index patchIndex) const { - Index rowIndex, colIndex, otherIndex; - computeBaseIndices(patchIndex, rowIndex, colIndex, otherIndex); - return loadCoeff(row, rowIndex, colIndex, otherIndex); - } - - EIGEN_DEVICE_FUNC - EIGEN_ALWAYS_INLINE Packet loadPacket(Index row) const { - Index rowIndex, colIndex, otherIndex; - computeBaseIndices(0, rowIndex, colIndex, otherIndex); - return loadPacket(row, rowIndex, colIndex, otherIndex); - } - - // Load the packet at the patchIndex location instead of the usual m_rowIndex, - // m_colIndex, m_otherIndex. This is currently only used by the gpu code. - EIGEN_DEVICE_FUNC - EIGEN_ALWAYS_INLINE Packet loadPacket(Index row, Index patchIndex) const { - Index rowIndex, colIndex, otherIndex; - computeBaseIndices(patchIndex, rowIndex, colIndex, otherIndex); - return loadPacket(row, rowIndex, colIndex, otherIndex); - } - - EIGEN_DEVICE_FUNC - EIGEN_ALWAYS_INLINE const TensorEvaluator& impl() const { return m_impl; } - - EIGEN_DEVICE_FUNC - EIGEN_ALWAYS_INLINE Index patchDepth() const { return m_rowInputStride; } - EIGEN_DEVICE_FUNC - EIGEN_ALWAYS_INLINE Index patchRows() const { return m_colStride; } - EIGEN_DEVICE_FUNC - EIGEN_ALWAYS_INLINE Index patchCols() const { return m_patch_cols; } - - EIGEN_DEVICE_FUNC - EIGEN_ALWAYS_INLINE Packet packetNoPadding(const Index depth, const Index baseIndex) const { - const Index inputIndex = depth + baseIndex; - return m_impl.template packet(inputIndex); - } - - private: - friend class TensorContractionSubMapper >, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>; - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar loadCoeff(Index patchId, Index rowIndex, Index colIndex, Index otherIndex) const { - // Find the offset of the element wrt the location of the first element. - const Index patchOffset = patchId / m_fastDimZero; - - const Index colOffset = patchOffset / m_fastColStride; - const Index inputCol = colIndex + colOffset * m_in_col_strides; - const Index origInputCol = (m_patch_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0); - const Index rowOffset = patchOffset - colOffset * m_colStride; - const Index inputRow = rowIndex + rowOffset * m_in_row_strides; - const Index origInputRow = (m_patch_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0); - if (origInputCol < 0 | origInputRow < 0 | origInputCol >= m_inputCols | origInputRow >= m_inputRows | - (inputCol != origInputCol * m_patch_col_inflate_strides) | (inputRow != origInputRow * m_patch_row_inflate_strides)) { - return Scalar(0); - } - const Index depth = patchId - patchOffset * patchDepth(); - const Index inputIndex = depth + origInputRow * m_rowInputStride + origInputCol * m_colInputStride + otherIndex; - return m_impl.coeff(inputIndex); - } - - EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar loadCoeffStandard(Index patchId, Index rowIndex, Index colIndex, Index otherIndex) const { - eigen_assert(!nonStandardPatches()); - - // Find the offset of the element wrt the location of the first element. - const Index patchOffset = patchId / m_fastDimZero; - - const Index colOffset = patchOffset / m_fastColStride; - const Index inputCol = colIndex + colOffset; - const Index rowOffset = patchOffset - colOffset * m_colStride; - const Index inputRow = rowIndex + rowOffset; - if (inputCol < 0 || inputCol >= m_inputCols || inputRow < 0 || inputRow >= m_inputRows) { - return Scalar(0); - } - const Index depth = patchId - patchOffset * patchDepth(); - const Index inputIndex = depth + inputRow * m_rowInputStride + inputCol * m_colInputStride + otherIndex; - return m_impl.coeff(inputIndex); - } - - EIGEN_DEVICE_FUNC - EIGEN_ALWAYS_INLINE Packet loadPacket(Index patchId, Index rowIndex, Index colIndex, Index otherIndex) const { - const Index packetSize = internal::unpacket_traits::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(patchId < patchDepth()*patchRows()*m_patch_cols); - - if (nonStandardPatches()) { - return packetWithPossibleZero(patchId, rowIndex, colIndex, otherIndex); - } - return loadPacketStandard(patchId, rowIndex, colIndex, otherIndex); - } - - EIGEN_DEVICE_FUNC - EIGEN_ALWAYS_INLINE Packet loadPacketStandard(Index patchId, Index rowIndex, Index colIndex, Index otherIndex) const { - const Index packetSize = internal::unpacket_traits::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(patchId < patchDepth()*patchRows()*m_patch_cols); - - eigen_assert(!nonStandardPatches()); - - if ((patchDepth() % packetSize) == 0) { - return loadPacketFast(patchId, rowIndex, colIndex, otherIndex); - } - else { - const Index patchOffsets[2] = {patchId / m_fastDimZero, (patchId + packetSize - 1) / m_fastDimZero}; - - const Index colOffsets[2] = {patchOffsets[0] / m_fastColStride, patchOffsets[1] / m_fastColStride}; - - const Index inputCols[2] = {colIndex + colOffsets[0], colIndex + colOffsets[1]}; - if (inputCols[0] >= m_inputCols | inputCols[1] < 0) { - // all zeros - return internal::pset1(Scalar(0)); - } - - if (inputCols[0] == inputCols[1]) { - const Index rowOffsets[2] = {patchOffsets[0] - colOffsets[0]*m_colStride, patchOffsets[1] - colOffsets[1]*m_colStride}; - eigen_assert(rowOffsets[0] <= rowOffsets[1]); - const Index inputRows[2] = {rowIndex + rowOffsets[0], rowIndex + rowOffsets[1]}; - - if (inputRows[0] >= m_inputRows | inputRows[1] < 0) { - // all zeros - return internal::pset1(Scalar(0)); - } - - if (inputRows[0] >= 0 & inputRows[1] < m_inputRows) { - // no padding - const Index depth = patchId - patchOffsets[0] * patchDepth(); - const Index inputIndex = depth + inputRows[0] * m_rowInputStride + inputCols[0] * m_colInputStride + otherIndex; - return m_impl.template packet(inputIndex); - } - } - } - return packetWithPossibleZero(patchId, rowIndex, colIndex, otherIndex); - } - - EIGEN_DEVICE_FUNC - EIGEN_ALWAYS_INLINE Packet loadPacketFast(Index patchId, Index rowIndex, Index colIndex, Index otherIndex) const { - const Index packetSize = internal::unpacket_traits::size; - EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE) - eigen_assert(patchId < patchDepth()*patchRows()*m_patch_cols); - - eigen_assert(!nonStandardPatches()); - eigen_assert((patchDepth() % packetSize) == 0); - // Find the offset of the element wrt the location of the first element. - const Index patchOffset = patchId / m_fastDimZero; - eigen_assert((patchId + packetSize - 1) / m_fastDimZero == patchOffset); - - const Index colOffset = patchOffset / m_fastColStride; - const Index inputCol = colIndex + colOffset; - const Index rowOffset = patchOffset - colOffset*m_colStride; - const Index inputRow = rowIndex + rowOffset; - if (inputCol < 0 | inputRow < 0 | inputCol >= m_inputCols | inputRow >= m_inputRows) { - // all zeros - return internal::pset1(Scalar(0)); - } - // no padding - const Index depth = patchId - patchOffset * patchDepth(); - const Index inputIndex = depth + inputRow * m_rowInputStride + inputCol * m_colInputStride + otherIndex; - return m_impl.template packet(inputIndex); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet packetWithPossibleZero(Index patchId, Index rowIndex, Index colIndex, Index otherIndex) const - { - const int packetSize = internal::unpacket_traits::size; - EIGEN_ALIGN_MAX typename internal::remove_const::type values[packetSize]; - for (int i = 0; i < packetSize; ++i) { - values[i] = loadCoeff(patchId+i, rowIndex, colIndex, otherIndex); - } - Packet rslt = internal::pload(values); - return rslt; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void computeBaseIndices(Index patchIndex, Index& rowIndex, Index& colIndex, Index& otherIndex) const { - const int NumInputDims = array_size::Dimensions>::value; - otherIndex = (NumInputDims == 3) ? 0 : patchIndex / m_fastNumPatches; - const Index patch2DIndex = (NumInputDims == 3) ? patchIndex : (patchIndex - otherIndex * m_num_patches); - otherIndex *= m_patchInputStride; - colIndex = patch2DIndex / m_fastOutputRows; - rowIndex = patch2DIndex - colIndex * m_outputRows; - colIndex = colIndex * m_col_strides - m_colPaddingLeft; - rowIndex = rowIndex * m_row_strides - m_rowPaddingTop; - } - - Index m_patch_cols; // number of colums in the patch - Index m_num_patches; // number of patches to extract. - Index m_patch_row_inflate_strides; // the strides for row inflation in the image patch - Index m_patch_col_inflate_strides; // the strides for col inflation in the image patch - // Fast representation of inflation strides. - internal::TensorIntDivisor m_fastInputRowStride; - internal::TensorIntDivisor m_fastInputColStride; - - Index m_otherStride; - Index m_colStride; - internal::TensorIntDivisor m_fastNumPatches; - internal::TensorIntDivisor m_fastColStride; - - Index m_rowInputStride; // row stride in the input tensor - Index m_colInputStride; // col stride in the input tensor - Index m_patchInputStride; // patch stride in the input tensor - - Index m_inputRows; // Number of rows in the input tensor - Index m_inputCols; // Number of cols in the input tensor - - Index m_outputRows; // Number of patch rows - - Index m_row_strides; // User specified row stride - Index m_col_strides; // User specified col stride - - Index m_in_row_strides; // User specified input row stride - Index m_in_col_strides; // User specified input col stride - - Index m_rowPaddingTop; // Row padding - Index m_colPaddingLeft; // Column padding - - internal::TensorIntDivisor m_fastOutputRows; - internal::TensorIntDivisor m_fastDimZero; - - const TensorEvaluator m_impl; -}; - - -template -class TensorContractionSubMapper >, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> -{ - public: - typedef Scalar_ Scalar; - typedef typename packet_traits::type Packet; - typedef typename packet_traits::half HalfPacket; - - typedef TensorContractionInputMapper >, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> ParentMapper; - typedef TensorContractionSubMapper >, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Self; - typedef Self LinearMapper; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionSubMapper(const ParentMapper& base_mapper, Index vert_offset, Index horiz_offset) - : m_base_mapper(base_mapper), m_depth_offset(vert_offset), m_col_offset(horiz_offset) { - m_base_mapper.computeBaseIndices(m_col_offset, m_rowIndex, m_colIndex, m_otherIndex); - } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionSubMapper(const Self& base_mapper, Index vert_offset, Index horiz_offset) - : m_base_mapper(base_mapper.m_base_mapper), m_depth_offset(vert_offset+base_mapper.m_depth_offset), m_col_offset(horiz_offset+base_mapper.m_col_offset) { - m_base_mapper.computeBaseIndices(m_col_offset, m_rowIndex, m_colIndex, m_otherIndex); - } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const { - return m_base_mapper.loadCoeff(i + m_depth_offset, m_rowIndex, m_colIndex, m_otherIndex); - } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i, Index j) const { - return m_base_mapper(i + m_depth_offset, j + m_col_offset); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const { - return m_base_mapper.loadPacket(i + m_depth_offset, m_rowIndex, m_colIndex, m_otherIndex); - } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const { - return m_base_mapper.template loadPacket(i + m_depth_offset, j + m_col_offset); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar loadCoeffStandard(Index i) const { - return m_base_mapper.loadCoeffStandard(i + m_depth_offset, m_rowIndex, m_colIndex, m_otherIndex); - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacketFast(Index i) const { - return m_base_mapper.loadPacketFast(i + m_depth_offset, m_rowIndex, m_colIndex, m_otherIndex); - } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacketStandard(Index i) const { - return m_base_mapper.loadPacketStandard(i + m_depth_offset, m_rowIndex, m_colIndex, m_otherIndex); - } - template - EIGEN_DEVICE_FUNC bool aligned(Index) const { - return false; - } - - EIGEN_DEVICE_FUNC - EIGEN_ALWAYS_INLINE bool nonStandardPatches() const { - return m_base_mapper.nonStandardPatches(); - } - - EIGEN_DEVICE_FUNC - EIGEN_ALWAYS_INLINE Index patchDepth() const { return m_base_mapper.m_rowInputStride; } - EIGEN_DEVICE_FUNC - EIGEN_ALWAYS_INLINE Index patchRows() const { return m_base_mapper.m_colStride; } - EIGEN_DEVICE_FUNC - EIGEN_ALWAYS_INLINE Index patchCols() const { return m_base_mapper.m_patch_cols; } - - EIGEN_DEVICE_FUNC - EIGEN_ALWAYS_INLINE Packet packetNoPadding(const Index depth, const Index baseIndex) const { - const Index inputIndex = depth + baseIndex; - return m_base_mapper.m_impl.template packet(inputIndex); - } - - EIGEN_DEVICE_FUNC - EIGEN_ALWAYS_INLINE bool padRow(const Index row) const { - const Index r = m_rowIndex + row; - return r < 0 | r >= m_base_mapper.m_inputRows; - } - EIGEN_DEVICE_FUNC - EIGEN_ALWAYS_INLINE bool padCol(const Index col) const { - const Index c = m_colIndex + col; - return c < 0 | c >= m_base_mapper.m_inputCols; - } - EIGEN_DEVICE_FUNC - EIGEN_ALWAYS_INLINE Index baseIndex(const Index row, const Index col) const { - const Index r = m_rowIndex + row; - const Index c = m_colIndex + col; - return r * m_base_mapper.m_rowInputStride + c * m_base_mapper.m_colInputStride + m_otherIndex; - } - - EIGEN_DEVICE_FUNC - EIGEN_ALWAYS_INLINE Index rowOffset() const { - const Index patchOffset = m_depth_offset / m_base_mapper.m_fastDimZero; - const Index colOffset = patchOffset / m_base_mapper.m_fastColStride; - return patchOffset-colOffset*m_base_mapper.m_colStride; - } - - EIGEN_DEVICE_FUNC - EIGEN_ALWAYS_INLINE Index colOffset() const { - const Index patchOffset = m_depth_offset / m_base_mapper.m_fastDimZero; - const Index colOffset = patchOffset / m_base_mapper.m_fastColStride; - return colOffset; - } - - EIGEN_DEVICE_FUNC - EIGEN_ALWAYS_INLINE Index depthOffset() const { - const Index patchOffset = m_depth_offset % m_base_mapper.patchDepth(); - return patchOffset; - } - - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const { - return LinearMapper(m_base_mapper, i + m_depth_offset, j + m_col_offset); - } - - private: - const ParentMapper& m_base_mapper; // that was a reference before - Index m_depth_offset; // First row in the input matrix - Index m_col_offset; // First col in the input matrix - - Index m_rowIndex; // precomputed row index corresponding to the col offset - Index m_colIndex; // precomputed col index corresponding to the col offset - Index m_otherIndex; // precomputed other index corresponding to the col offset - -}; - - -template -struct gemm_pack_rhs >, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>, nr, ColMajor, false, false> { - - typedef TensorContractionSubMapper >, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> SubMapper; - typedef SubMapper DataMapper; - - static inline Index ceil_div(Index a, Index b) { - return (a + b - 1) / b; - } - - EIGEN_DONT_INLINE void operator()(Scalar* block, const DataMapper& rhs, Index depth, Index cols, Index stride=0, Index offset=0) const { - eigen_assert(stride == 0); - eigen_assert(offset == 0); - - EIGEN_STATIC_ASSERT((nr == 4), YOU_MADE_A_PROGRAMMING_MISTAKE); - typedef typename DataMapper::LinearMapper LinearMapper; - typedef typename packet_traits::type Packet; - - const Index packet_cols4 = (cols/4) * 4; - const Index peeled_k = (depth/packet_size) * packet_size; - const bool non_standard_patches = rhs.nonStandardPatches(); - - for(Index j2=0; j2(ceil_div(peeled_k, patch_rows*patch_depth)+startCol, patch_cols); - - for (Index c = startCol; c < max_cols; ++c) { - eigen_assert(k < peeled_k); - const Index startRow = (c == startCol) ? rhs.rowOffset() : 0; - const Index max_rows = std::min(ceil_div(peeled_k-c*patch_rows*patch_depth, patch_depth)+startRow, patch_rows); - - const bool pad_col0 = dm0.padCol(c); - const bool pad_col1 = dm1.padCol(c); - const bool pad_col2 = dm2.padCol(c); - const bool pad_col3 = dm3.padCol(c); - for (Index r = startRow; r < max_rows; ++r) { - eigen_assert(k < peeled_k); - const bool pad0 = pad_col0 || dm0.padRow(r); - const bool pad1 = pad_col1 || dm1.padRow(r); - const bool pad2 = pad_col2 || dm2.padRow(r); - const bool pad3 = pad_col3 || dm3.padRow(r); - - const Index idx0 = dm0.baseIndex(r, c); - const Index idx1 = dm1.baseIndex(r, c); - const Index idx2 = dm2.baseIndex(r, c); - const Index idx3 = dm3.baseIndex(r, c); - - const Index startDepth = ((c == startCol) && (r == startRow)) ? rhs.depthOffset() : 0; - const Index max_depth = std::min(peeled_k-c*patch_rows*patch_depth-r*patch_depth+startDepth, patch_depth); - eigen_assert(max_depth % packet_size == 0); - for (Index d = startDepth; d < max_depth; d += packet_size) { - eigen_assert(k < peeled_k); - PacketBlock kernel; - kernel.packet[0] = pad0 ? pset1(0) : rhs.packetNoPadding(d, idx0); - kernel.packet[1] = pad1 ? pset1(0) : rhs.packetNoPadding(d, idx1); - kernel.packet[2] = pad2 ? pset1(0) : rhs.packetNoPadding(d, idx2); - kernel.packet[3] = pad3 ? pset1(0) : rhs.packetNoPadding(d, idx3); - ptranspose(kernel); - pstoreu(block+0*packet_size, kernel.packet[0]); - pstoreu(block+1*packet_size, kernel.packet[1]); - pstoreu(block+2*packet_size, kernel.packet[2]); - pstoreu(block+3*packet_size, kernel.packet[3]); - block+=4*packet_size; - k += packet_size; - } - } - } - - for(; k kernel; - kernel.packet[0] = dm0.loadPacketFast(k); - kernel.packet[1] = dm1.loadPacketFast(k); - kernel.packet[2] = dm2.loadPacketFast(k); - kernel.packet[3] = dm3.loadPacketFast(k); - ptranspose(kernel); - pstoreu(block+0*packet_size, kernel.packet[0]); - pstoreu(block+1*packet_size, kernel.packet[1]); - pstoreu(block+2*packet_size, kernel.packet[2]); - pstoreu(block+3*packet_size, kernel.packet[3]); - block+=4*packet_size; - } - } - else { - for(; k kernel; - kernel.packet[0] = dm0.loadPacketStandard(k); - kernel.packet[1] = dm1.loadPacketStandard(k); - kernel.packet[2] = dm2.loadPacketStandard(k); - kernel.packet[3] = dm3.loadPacketStandard(k); - ptranspose(kernel); - pstoreu(block+0*packet_size, kernel.packet[0]); - pstoreu(block+1*packet_size, kernel.packet[1]); - pstoreu(block+2*packet_size, kernel.packet[2]); - pstoreu(block+3*packet_size, kernel.packet[3]); - block+=4*packet_size; - } - } - } - if (!rhs.nonStandardPatches()) { - for(; k 1, then applies convolution with holes (aka atrous convolution), sampling every in_stride input pixels. - * - * The result can be assigned to a tensor of rank equal to the rank of the input. The dimensions of the result will be filters, height, width (and others if applicable). - * - * It is possible to swap the order of the width and height dimensions provided that the same order is used in the input, the kernel, and the output. - * - */ -template -EIGEN_ALWAYS_INLINE -static const typename internal::conditional< - internal::traits::Layout == ColMajor, - TensorReshapingOp::Index, internal::traits::NumDimensions>, const TensorContractionOp::Index>, 1>, const TensorReshapingOp::Index, 2>, const Kernel>, const TensorReshapingOp::Index, 2>, const TensorImagePatchOp > > >, - TensorReshapingOp::Index, internal::traits::NumDimensions>, const TensorContractionOp::Index>, 1>, const TensorReshapingOp::Index, 2>, const TensorImagePatchOp >, const TensorReshapingOp::Index, 2>, const Kernel> > > >::type -SpatialConvolution(const Input& input, const Kernel& kernel, const DenseIndex stride = 1, const PaddingType padding_type = PADDING_SAME, const DenseIndex in_stride = 1) { - - typedef typename internal::traits::Index TensorIndex; - TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex> > in(input); - TensorRef::Scalar, internal::traits::NumDimensions, internal::traits::Layout, TensorIndex> > kern(kernel); - - EIGEN_STATIC_ASSERT(internal::traits::Layout == internal::traits::Layout, YOU_MADE_A_PROGRAMMING_MISTAKE); - static const bool isColMajor = (internal::traits::Layout == ColMajor); - - static const int NumDims = internal::traits::NumDimensions; - - // Number of filters to apply. This is the same as the output depth of the result - const TensorIndex kernelFilters = isColMajor ? kern.dimensions()[0] : kern.dimensions()[3]; - // Number of channels. This is the same as the input depth. - const TensorIndex kernelChannels = isColMajor ? kern.dimensions()[1] : kern.dimensions()[2]; - const TensorIndex kernelRows = isColMajor ? kern.dimensions()[2] : kern.dimensions()[1]; - const TensorIndex kernelCols = isColMajor ? kern.dimensions()[3] : kern.dimensions()[0]; - - const DenseIndex kernelRowsEff = kernelRows + (kernelRows - 1) * (in_stride - 1); - const DenseIndex kernelColsEff = kernelCols + (kernelCols - 1) * (in_stride - 1); - - array, 1> contract_dims; - contract_dims[0] = IndexPair(1, 0); - - const TensorIndex InputRows = isColMajor ? in.dimension(1) : in.dimension(NumDims - 2); - const TensorIndex InputCols = isColMajor ? in.dimension(2) : in.dimension(NumDims - 3); - - TensorIndex out_height; - TensorIndex out_width; - switch (padding_type) { - case PADDING_VALID: - out_height = numext::ceil((InputRows - kernelRowsEff + 1.f) / static_cast(stride)); - out_width = numext::ceil((InputCols - kernelColsEff + 1.f) / static_cast(stride)); - break; - case PADDING_SAME: - out_height = numext::ceil(InputRows / static_cast(stride)); - out_width = numext::ceil(InputCols / static_cast(stride)); - break; - default: - eigen_assert(false && "unexpected padding"); - } - - // Molds the output of the patch extraction code into a 2d tensor: - // - the first dimension (dims[0]): the patch values to be multiplied with the kernels - // - the second dimension (dims[1]): everything else - DSizes pre_contract_dims; - if (isColMajor) { - pre_contract_dims[0] = kernelChannels * kernelRows * kernelCols; - pre_contract_dims[1] = out_height * out_width; - for (int i = 3; i < NumDims; ++i) { - pre_contract_dims[1] *= in.dimension(i); - } - } else { - pre_contract_dims[1] = kernelChannels * kernelRows * kernelCols; - pre_contract_dims[0] = out_height * out_width; - for (int i = 0; i < NumDims - 3; ++i) { - pre_contract_dims[0] *= in.dimension(i); - } - } - - // Molds the output of the contraction into the shape expected by the used - // (assuming this is ColMajor): - // - 1st dim: kernel filters - // - 2nd dim: output height - // - 3rd dim: output width - // - 4th dim and beyond: everything else including batch size - DSizes post_contract_dims; - if (isColMajor) { - post_contract_dims[0] = kernelFilters; - post_contract_dims[1] = out_height; - post_contract_dims[2] = out_width; - for (int i = 3; i < NumDims; ++i) { - post_contract_dims[i] = in.dimension(i); - } - } else { - post_contract_dims[NumDims - 1] = kernelFilters; - post_contract_dims[NumDims - 2] = out_height; - post_contract_dims[NumDims - 3] = out_width; - for (int i = 0; i < NumDims - 3; ++i) { - post_contract_dims[i] = in.dimension(i); - } - } - - DSizes kernel_dims; - if (isColMajor) { - kernel_dims[0] = kernelFilters; - kernel_dims[1] = kernelChannels * kernelRows * kernelCols; - } else { - kernel_dims[0] = kernelChannels * kernelRows * kernelCols; - kernel_dims[1] = kernelFilters; - } - // TODO(yangke): choose() is defined in TensorContraction.h -- consider - // moving it to somewhere more "common". - return choose(Cond::Layout == ColMajor>(), - kernel.reshape(kernel_dims).contract(input.extract_image_patches(kernelRows, kernelCols, stride, stride, in_stride, in_stride, padding_type).reshape(pre_contract_dims), contract_dims).reshape(post_contract_dims), - input.extract_image_patches(kernelRows, kernelCols, stride, stride, in_stride, in_stride, padding_type).reshape(pre_contract_dims).contract(kernel.reshape(kernel_dims), contract_dims).reshape(post_contract_dims)); -} - -} // end namespace Eigen - -#endif // EIGEN_CXX11_NEURAL_NETWORKS_SPATIAL_CONVOLUTIONS_H diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/TensorConvolutionByFFT.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/TensorConvolutionByFFT.h deleted file mode 100644 index 0e72173536..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/TensorConvolutionByFFT.h +++ /dev/null @@ -1,289 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2014 Benoit Steiner -// Copyright (C) 2015 Jianwei Cui -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTIONBYFFT_H -#define EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTIONBYFFT_H - -namespace Eigen { - -/** \class TensorConvolutionByFFT - * \ingroup CXX11_Tensor_Module - * - * \brief Tensor convolution class. - * - * - */ -namespace internal { - - -template -struct traits > -{ - // Type promotion to handle the case where the types of the lhs and the rhs are different. - typedef typename promote_storage_type::ret Scalar; - typedef typename packet_traits::type Packet; - typedef typename promote_storage_type::StorageKind, - typename traits::StorageKind>::ret StorageKind; - typedef typename promote_index_type::Index, - typename traits::Index>::type Index; - typedef typename InputXprType::Nested LhsNested; - typedef typename KernelXprType::Nested RhsNested; - typedef typename remove_reference::type _LhsNested; - typedef typename remove_reference::type _RhsNested; - static const int NumDimensions = traits::NumDimensions; - static const int Layout = traits::Layout; - - enum { - Flags = 0, - }; -}; - -template -struct eval, Eigen::Dense> -{ - typedef const TensorConvolutionByFFTOp& type; -}; - -template -struct nested, 1, typename eval >::type> -{ - typedef TensorConvolutionByFFTOp type; -}; - -} // end namespace internal - - - -template -class TensorConvolutionByFFTOp : public TensorBase > -{ - public: - typedef typename Eigen::internal::traits::Scalar Scalar; - typedef typename Eigen::internal::traits::Packet Packet; - typedef typename Eigen::NumTraits::Real RealScalar; - typedef typename internal::promote_storage_type::ret CoeffReturnType; - typedef typename internal::promote_storage_type::ret PacketReturnType; - typedef typename Eigen::internal::nested::type Nested; - typedef typename Eigen::internal::traits::StorageKind StorageKind; - typedef typename Eigen::internal::traits::Index Index; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConvolutionByFFTOp(const InputXprType& input, const KernelXprType& kernel, const Indices& dims) - : m_input_xpr(input), m_kernel_xpr(kernel), m_indices(dims) {} - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const Indices& indices() const { return m_indices; } - - /** \returns the nested expressions */ - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const typename internal::remove_all::type& - inputExpression() const { return m_input_xpr; } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE - const typename internal::remove_all::type& - kernelExpression() const { return m_kernel_xpr; } - - protected: - typename InputXprType::Nested m_input_xpr; - typename KernelXprType::Nested m_kernel_xpr; - const Indices m_indices; -}; - - -template -struct TensorEvaluator, Device> -{ - typedef TensorConvolutionByFFTOp XprType; - - typedef typename XprType::Scalar Scalar; - typedef typename XprType::CoeffReturnType CoeffReturnType; - typedef typename XprType::PacketReturnType PacketReturnType; - - typedef typename Eigen::NumTraits::Real RealScalar; - - static const int NumDims = internal::array_size::Dimensions>::value; - static const int NumKernelDims = internal::array_size::value; - typedef typename XprType::Index Index; - typedef DSizes Dimensions; - - enum { - IsAligned = TensorEvaluator::IsAligned & - TensorEvaluator::IsAligned, - PacketAccess = false, - BlockAccess = false, - Layout = TensorEvaluator::Layout, - CoordAccess = false, // to be implemented - }; - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_inputImpl(op.inputExpression(), device), m_kernelImpl(op.kernelExpression(), device), m_kernelArg(op.kernelExpression()), m_kernel(NULL), m_local_kernel(false), m_device(device) - { - EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); - - const typename TensorEvaluator::Dimensions& input_dims = m_inputImpl.dimensions(); - const typename TensorEvaluator::Dimensions& kernel_dims = m_kernelImpl.dimensions(); - - if (static_cast(Layout) == static_cast(ColMajor)) { - m_inputStride[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_inputStride[i] = m_inputStride[i - 1] * input_dims[i - 1]; - } - } else { - m_inputStride[NumDims - 1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - m_inputStride[i] = m_inputStride[i + 1] * input_dims[i + 1]; - } - } - - m_dimensions = m_inputImpl.dimensions(); - if (static_cast(Layout) == static_cast(ColMajor)) { - for (int i = 0; i < NumKernelDims; ++i) { - const Index index = op.indices()[i]; - const Index input_dim = input_dims[index]; - const Index kernel_dim = kernel_dims[i]; - const Index result_dim = input_dim - kernel_dim + 1; - m_dimensions[index] = result_dim; - if (i > 0) { - m_kernelStride[i] = m_kernelStride[i - 1] * kernel_dims[i - 1]; - } else { - m_kernelStride[0] = 1; - } - m_indexStride[i] = m_inputStride[index]; - } - - m_outputStride[0] = 1; - for (int i = 1; i < NumDims; ++i) { - m_outputStride[i] = m_outputStride[i - 1] * m_dimensions[i - 1]; - } - } else { - for (int i = NumKernelDims - 1; i >= 0; --i) { - const Index index = op.indices()[i]; - const Index input_dim = input_dims[index]; - const Index kernel_dim = kernel_dims[i]; - const Index result_dim = input_dim - kernel_dim + 1; - m_dimensions[index] = result_dim; - if (i < NumKernelDims - 1) { - m_kernelStride[i] = m_kernelStride[i + 1] * kernel_dims[i + 1]; - } else { - m_kernelStride[NumKernelDims - 1] = 1; - } - m_indexStride[i] = m_inputStride[index]; - } - - m_outputStride[NumDims - 1] = 1; - for (int i = NumDims - 2; i >= 0; --i) { - m_outputStride[i] = m_outputStride[i + 1] * m_dimensions[i + 1]; - } - } - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; } - - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) { - m_inputImpl.evalSubExprsIfNeeded(NULL); - m_kernelImpl.evalSubExprsIfNeeded(NULL); - - typedef typename internal::traits::Index TensorIndex; - - Tensor input(m_inputImpl.dimensions()); - for (int i = 0; i < m_inputImpl.dimensions().TotalSize(); ++i) { - input.data()[i] = m_inputImpl.coeff(i); - } - - Tensor kernel(m_kernelImpl.dimensions()); - for (int i = 0; i < m_kernelImpl.dimensions().TotalSize(); ++i) { - kernel.data()[i] = m_kernelImpl.coeff(i); - } - - array, NumDims> paddings; - for (int i = 0; i < NumDims; ++i) { - paddings[i] = std::make_pair(0, m_inputImpl.dimensions()[i] - m_kernelImpl.dimensions()[i]); - } - - Eigen::array reverse; - for (int i = 0; i < NumKernelDims; ++i) { - reverse[i] = true; - } - - Eigen::array fft; - for (int i = 0; i < NumDims; ++i) { - fft[i] = i; - } - - Eigen::DSizes slice_offsets; - for (int i = 0; i < NumDims; ++i) { - slice_offsets[i] = m_kernelImpl.dimensions()[i] - 1; - } - - Eigen::DSizes slice_extents; - for (int i = 0; i < NumDims; ++i) { - slice_extents[i] = m_inputImpl.dimensions()[i] - m_kernelImpl.dimensions()[i] + 1; - } - - Tensor kernel_variant = kernel.reverse(reverse).pad(paddings); - Tensor, NumDims, Layout, TensorIndex> kernel_fft = kernel_variant.template fft(fft); - //Tensor, NumDims, Layout|IndexType> kernel_fft = kernel.reverse(reverse).pad(paddings).template fft<2>(fft); - Tensor, NumDims, Layout, TensorIndex> input_fft = input.template fft(fft); - Tensor, NumDims, Layout, TensorIndex> prod = (input_fft * kernel_fft).template fft(fft); - Tensor, NumDims, Layout, TensorIndex> tensor_result = prod.slice(slice_offsets, slice_extents); - - for (int i = 0; i < tensor_result.size(); ++i) { - data[i] = std::real(tensor_result.data()[i]); - } - return false; - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { - m_inputImpl.cleanup(); - if (m_local_kernel) { - m_device.deallocate((void*)m_kernel); - m_local_kernel = false; - } - m_kernel = NULL; - } - - void evalTo(typename XprType::Scalar* buffer) { - evalSubExprsIfNeeded(NULL); - for (int i = 0; i < dimensions().TotalSize(); ++i) { - buffer[i] += coeff(i); - } - cleanup(); - } - - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const - { - CoeffReturnType result = CoeffReturnType(0); - return result; - } - - EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } - - private: - array m_inputStride; - array m_outputStride; - - array m_indexStride; - array m_kernelStride; - TensorEvaluator m_inputImpl; - TensorEvaluator m_kernelImpl; - Dimensions m_dimensions; - - KernelArgType m_kernelArg; - const Scalar* m_kernel; - bool m_local_kernel; - const Device& m_device; -}; - -} // end namespace Eigen - -#endif // EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTIONBYFFT_H -- cgit v1.2.3 From 403845d3e26291d6013c623b9130f4404c969ca6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 26 Jul 2018 16:27:07 -0700 Subject: Remove tensorflow/opensource_only/Core as it is not used by tensorflow. PiperOrigin-RevId: 206240947 --- third_party/eigen3/unsupported/Eigen/CXX11/Core | 46 ------------------------- 1 file changed, 46 deletions(-) delete mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/Core (limited to 'third_party') diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/Core b/third_party/eigen3/unsupported/Eigen/CXX11/Core deleted file mode 100644 index 1b3690716c..0000000000 --- a/third_party/eigen3/unsupported/Eigen/CXX11/Core +++ /dev/null @@ -1,46 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2013 Christian Seiler -// Copyright (C) 2014 Benoit Steiner -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#ifndef EIGEN_CXX11_CORE_MODULE -#define EIGEN_CXX11_CORE_MODULE - -#include - -#include - -/** \defgroup CXX11_Core_Module C++11 Core Module - * - * This module provides common core features for all modules that - * explicitly depend on C++11. Currently, this is only the Tensor - * module. Note that at this stage, you should not need to include - * this module directly. - * - * It also provides a limited fallback for compilers that don't support - * CXX11 yet, such as nvcc. - * - * \code - * #include - * \endcode - */ - -// Only a subset of cxx11 is allowed at Google, so we default to emulate the -// cxx11 functionality that we need. -#include "src/Core/util/FixedSizeVector.h" -#if 1 -#include -#include "src/Core/util/EmulateCXX11Meta.h" -#else -#include "src/Core/util/CXX11Workarounds.h" -#include "src/Core/util/CXX11Meta.h" -#endif -#include - -#endif // EIGEN_CXX11_CORE_MODULE - -- cgit v1.2.3 From a9911cb06b931be7207ac2938dfffe9db3313e3c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 27 Jul 2018 10:26:19 -0700 Subject: Bug fixes and 16 bit matmul added in CXX11/src/FixedPoint. PiperOrigin-RevId: 206335619 --- .../Eigen/CXX11/src/FixedPoint/FixedPointTypes.h | 6 +- .../Eigen/CXX11/src/FixedPoint/MatMatProduct.h | 86 +++- .../Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h | 482 ++++++++++++++++++++- .../Eigen/CXX11/src/FixedPoint/MatMatProductNEON.h | 9 +- .../Eigen/CXX11/src/FixedPoint/MatVecProduct.h | 39 +- .../Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h | 8 +- .../Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h | 6 +- .../Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h | 16 +- .../Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h | 53 ++- 9 files changed, 659 insertions(+), 46 deletions(-) (limited to 'third_party') diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h index 6b625abc3e..5ab3664918 100644 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h +++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h @@ -7,8 +7,8 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -#ifndef EIGEN_CXX11_FIXED_POINT_TYPES_H -#define EIGEN_CXX11_FIXED_POINT_TYPES_H +#ifndef CXX11_SRC_FIXEDPOINT_FIXEDPOINTTYPES_H_ +#define CXX11_SRC_FIXEDPOINT_FIXEDPOINTTYPES_H_ #include #include @@ -339,4 +339,4 @@ EIGEN_STRONG_INLINE std::ostream& operator<<(std::ostream& os, QInt32 a) { } // namespace Eigen -#endif // EIGEN_CXX11_FIXED_POINT_TYPES_H +#endif // CXX11_SRC_FIXEDPOINT_FIXEDPOINTTYPES_H_ diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProduct.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProduct.h index 4d0dca07df..e6f4080ae1 100644 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProduct.h +++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProduct.h @@ -7,9 +7,8 @@ // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -#ifndef EIGEN_CXX11_FIXED_POINT_MAT_MAT_PRODUCT_H -#define EIGEN_CXX11_FIXED_POINT_MAT_MAT_PRODUCT_H - +#ifndef CXX11_SRC_FIXEDPOINT_MATMATPRODUCT_H_ +#define CXX11_SRC_FIXEDPOINT_MATMATPRODUCT_H_ namespace Eigen { namespace internal { @@ -24,6 +23,14 @@ template<> struct scalar_product_traits typedef QInt32 ReturnType; }; +// Accumulate the product of 2 QInt16 inputs on 32 bits to prevent +// overflows +template <> +struct scalar_product_traits { + enum { Defined = 1 }; + typedef QInt32 ReturnType; +}; + // Accumulate the product of QInt8 inputs with QUint8 inputs on 32 bits // to prevent overflows template<> struct scalar_product_traits @@ -247,9 +254,76 @@ void gebp_kernel +class gebp_traits { + public: + typedef QInt16 LhsScalar; + typedef QInt16 RhsScalar; + typedef QInt32 ResScalar; + + enum { + // register block size along the M and N directions + // One for the current implementation + nr = 1, + mr = 1, + // Progress made at each iteration of the product loop + // also 1 for the current implementation + LhsProgress = 1, + RhsProgress = 1 + }; +}; + +// The signed 16bit Mat-Mat product itself. +template +struct gebp_kernel { + EIGEN_DONT_INLINE + void operator()(const DataMapper& res, const QInt16* blockA, + const QInt16* blockB, Index rows, Index depth, Index cols, + QInt32 alpha, Index strideA = -1, Index strideB = -1, + Index offsetA = 0, Index offsetB = 0); +}; + +template +EIGEN_DONT_INLINE void gebp_kernel:: +operator()(const DataMapper& res, const QInt16* blockA, const QInt16* blockB, + Index rows, Index depth, Index cols, QInt32 alpha, Index strideA, + Index strideB, Index offsetA, Index offsetB) { + EIGEN_STATIC_ASSERT(!ConjugateLhs, YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT(!ConjugateRhs, YOU_MADE_A_PROGRAMMING_MISTAKE); + eigen_assert(alpha.value == 1); + eigen_assert(strideA == -1); + eigen_assert(strideB == -1); + eigen_assert(offsetA == 0); + eigen_assert(offsetB == 0); + + eigen_assert(rows > 0); + eigen_assert(cols > 0); + eigen_assert(depth > 0); + eigen_assert(blockA); + eigen_assert(blockB); + + for (Index j = 0; j < cols; ++j) { + Index startB = j * depth; + for (Index i = 0; i < rows; ++i) { + Index startA = i * depth; + + for (Index k = 0; k < depth; ++k) { + res(i, j) += blockA[startA + k] * blockB[startB + k]; + } + } + } +} +#endif + +} // namespace internal +} // namespace Eigen -#endif // EIGEN_CXX11_FIXED_POINT_MAT_MAT_PRODUCT_H +#endif // CXX11_SRC_FIXEDPOINT_MATMATPRODUCT_H_ diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h index 6b4b0edcfb..66532fb600 100644 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h +++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h @@ -3,17 +3,493 @@ // // Copyright (C) 2015 Benoit Steiner // Copyright (C) 2015 Matthew Sarett +// Copyright (C) 2016 Nishant Patil // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. -#ifndef EIGEN_CXX11_FIXED_POINT_MAT_MAT_PRODUCT_AVX2_H -#define EIGEN_CXX11_FIXED_POINT_MAT_MAT_PRODUCT_AVX2_H +#ifndef CXX11_SRC_FIXEDPOINT_MATMATPRODUCTAVX2_H_ +#define CXX11_SRC_FIXEDPOINT_MATMATPRODUCTAVX2_H_ namespace Eigen { namespace internal { +// AVX2 optimized implementation of Mat-Mat product. +// LHS is encoded using signed 16-bit integers. +// RHS is encoded using signed 16-bit integers. +#ifdef EIGEN_USE_OPTIMIZED_INT16_INT16_MAT_MAT_PRODUCT + +// Define quantized traits +template +class gebp_traits { + public: + typedef QInt16 LhsScalar; + typedef QInt16 RhsScalar; + typedef QInt32 ResScalar; + + enum { + // Define register blocking scheme. + nr = 16, + mr = 16, + kr = 4, + // Ignore progress tracking per loop iteration. + LhsProgress = -1, + RhsProgress = -1 + }; +}; + +// Specialized blocking for quantized implementations. +// Used by TensorContractionThreadPool, inputs must have dimensions that are +// multiples of 32. +template +class TensorContractionBlocking { + public: + TensorContractionBlocking(Index k, Index m, Index n, Index num_threads = 1) + : kc_(((k + 15) / 16) * 16), + mc_(((m + 15) / 16) * 16), + nc_(((n + 15) / 16) * 16) { + eigen_assert(mc_ % 16 == 0); + eigen_assert(kc_ % 16 == 0); + if (!k || !m || !n) { + return; + } + + if (ShardingType == ShardByCol) { + eigen_assert(nc_ % 16 == 0); + nc_ = (((nc_ / num_threads) + 15) / 16) * 16; + } else { + eigen_assert(nc_ % 16 == 0); + mc_ = (((mc_ / num_threads) + 15) / 16) * 16; + } + } + + EIGEN_ALWAYS_INLINE Index kc() const { return kc_; } + EIGEN_ALWAYS_INLINE Index mc() const { return mc_; } + EIGEN_ALWAYS_INLINE Index nc() const { return nc_; } + + private: + Index kc_; + Index mc_; + Index nc_; +}; + +// Specialized blocking for quantized implementations. +// Used by TensorContraction and GeneralMatrixMatrix, inputs are padded to +// multiples of 32. +template +class gemm_blocking_space + : public level3_blocking { + DenseIndex m_sizeA; + DenseIndex m_sizeB; + + public: + gemm_blocking_space(DenseIndex rows, DenseIndex cols, DenseIndex depth, + DenseIndex /*num_threads*/, bool /*l3_blocking*/) { + this->m_mc = ((rows + 15) / 16) * 16; + this->m_nc = ((cols + 15) / 16) * 16; + this->m_kc = ((depth + 15) / 16) * 16; + m_sizeA = this->m_mc * this->m_kc; + m_sizeB = this->m_kc * this->m_nc; + } + void allocateA() { + if (this->m_blockA == 0) this->m_blockA = aligned_new(m_sizeA); + } + void allocateB() { + if (this->m_blockB == 0) this->m_blockB = aligned_new(m_sizeB); + } + void allocateAll() { + allocateA(); + allocateB(); + } + ~gemm_blocking_space() { + aligned_delete(this->m_blockA, m_sizeA); + aligned_delete(this->m_blockB, m_sizeB); + } +}; + +// Below are the fully optimized versions that are correct only for sizes that +// are multiple of 16. It is about a 10% performance benefit to keep these +// implementations separate. + +// Arrange a block of the left input matrix in contiguous memory. +// +// Given column major input (A0 beside A1 in memory): +// A0 B0 C0 D0 E0 F0 G0 H0 ... +// A1 B1 C1 D1 E1 F1 G1 H1 ... +// A2 B2 C2 D2 E2 F2 G2 H2 ... +// A3 B3 C3 D3 E3 F3 G3 H3 ... +// A4 B4 C4 D4 E4 F4 G4 H4 ... +// A5 B5 C5 D5 E5 F5 G5 H5 ... +// A6 B6 C6 D6 E6 F6 G6 H6 ... +// A7 B7 C7 D7 E7 F7 G7 H7 ... +// A8 ... +// ... +// +// Packing with m = 8 yields row major output (A0 beside B0 in memory): +// A0 B0 +// A1 B1 +// A2 B2 +// A3 B3 +// A4 B4 +// A5 B5 +// A6 B6 +// A7 B7 +// ... +// +// The purpose is to collect m rows of size k. Two elements of the same +// row are arranged contiguously because madd performs an adjacent addition +// in the kernel. + +template +struct gemm_pack_lhs { + EIGEN_DONT_INLINE void operator()(QInt16* blockA, const DataMapper& lhs, + Index depth, Index rows, Index stride = 0, + Index offset = 0); +}; + +template +EIGEN_DONT_INLINE void gemm_pack_lhs:: +operator()(QInt16* blockA, const DataMapper& lhs, Index depth, Index rows, + Index stride, Index offset) { + eigen_assert(stride == 0); + eigen_assert(offset == 0); + + // Use alternate function for weird sizes + if (rows % 16 != 0 || depth % 16 != 0) { + assert(false && + "only depths and rows that are a multiple of 16 are currently " + "supported"); + // gemm_pack_lhs_any lhs_pack; + // return lhs_pack(blockA, lhs, depth, rows, stride, offset); + } + + // Get vector pointer + __m256i* blockA_256 = reinterpret_cast<__m256i*>(blockA); + + // Pack rows in sets of 16 + for (Index m = 0; m < rows; m += 16) { + // Pack depth in sets of 4 + for (Index k = 0; k < depth; k += 4) { + // Load vectors + __m256i L_A = lhs.loadPacket(m, k); + __m256i L_B = lhs.loadPacket(m, k + 1); + __m256i L_C = lhs.loadPacket(m, k + 2); + __m256i L_D = lhs.loadPacket(m, k + 3); + + // Rearrange the inputs as required by the kernel + __m256i L_AB0_AB7 = _mm256_unpacklo_epi16(L_A, L_B); + __m256i L_AB8_AB15 = _mm256_unpackhi_epi16(L_A, L_B); + __m256i L_CD0_CD7 = _mm256_unpacklo_epi16(L_C, L_D); + __m256i L_CD8_CD15 = _mm256_unpackhi_epi16(L_C, L_D); + + __m256i L_AD0 = _mm256_permute2x128_si256(L_AB0_AB7, L_AB8_AB15, 0x20); + _mm256_store_si256(blockA_256++, L_AD0); + __m256i L_AD8 = _mm256_permute2x128_si256(L_CD0_CD7, L_CD8_CD15, 0x20); + _mm256_store_si256(blockA_256++, L_AD8); + __m256i L_AD16 = _mm256_permute2x128_si256(L_AB0_AB7, L_AB8_AB15, 0x31); + _mm256_store_si256(blockA_256++, L_AD16); + __m256i L_AD24 = _mm256_permute2x128_si256(L_CD0_CD7, L_CD8_CD15, 0x31); + _mm256_store_si256(blockA_256++, L_AD24); + } + } +} + +// Arrange a block of the right input matrix in contiguous memory. +// +// Given column major input (A0 beside A1 in memory): +// A0 B0 C0 D0 E0 F0 G0 H0 ... +// A1 B1 C1 D1 E1 F1 G1 H1 ... +// A2 B2 C2 D2 E2 F2 G2 H2 ... +// A3 B3 C3 D3 E3 F3 G3 H3 ... +// A4 B4 C4 D4 E4 F4 G4 H4 ... +// A5 B5 C5 D5 E5 F5 G5 H5 ... +// A6 B6 C6 D6 E6 F6 G6 H6 ... +// A7 B7 C7 D7 E7 F7 G7 H7 ... +// A8 ... +// ... +// Packing yields row major output (A0 beside A1 in memory): +// A0 A1 A2 A3 A4 A5 A6 A7 +// B0 B1 B2 B3 B4 B5 B6 B7 +// ... +// +// At least two elements of the same col are arranged contiguously because +// maddubs and madd both perform an adjacent addition in the kernel. We can +// save work by leaving 4 adjacent elements because kr = 4. +// The purpose is to collect n cols of size k. Two elements of the same +// col are arranged contiguously because madd performs an adjacent addition +// in the kernel. +template +struct gemm_pack_rhs { + EIGEN_DONT_INLINE void operator()(QInt16* blockB, const DataMapper& rhs, + Index depth, Index cols, Index stride = 0, + Index offset = 0); +}; + +template +EIGEN_DONT_INLINE void +gemm_pack_rhs:: +operator()(QInt16* blockB, const DataMapper& rhs, Index depth, Index cols, + Index stride, Index offset) { + eigen_assert(stride == 0); + eigen_assert(offset == 0); + + // Use alternate function for weird sizes + if (cols % 16 != 0 || depth % 16 != 0) { + assert(false && + "only depths and cols that are a multiple of 16 are currently " + "supported"); + // gemm_pack_rhs_any rhs_pack; + // return rhs_pack(blockB, rhs, depth, cols, stride, offset); + } + + // Get vector pointer + __m256i* blockB_256 = reinterpret_cast<__m256i*>(blockB); + + // Perform a step of the packing for 4 columns + __m256i R_AB_L, R_AB_H, R_CD_L, R_CD_H, R_AD_0, R_AD_4, R_AD_8, R_AD_12; +#define PACK_STEP \ + R_AB_L = _mm256_unpacklo_epi64(R_A, R_B); \ + R_CD_L = _mm256_unpacklo_epi64(R_C, R_D); \ + R_AB_H = _mm256_unpackhi_epi64(R_A, R_B); \ + R_CD_H = _mm256_unpackhi_epi64(R_C, R_D); \ + R_AD_0 = _mm256_permute2x128_si256(R_AB_L, R_CD_L, 0x20); \ + R_AD_8 = _mm256_permute2x128_si256(R_AB_L, R_CD_L, 0x31); \ + R_AD_4 = _mm256_permute2x128_si256(R_AB_H, R_CD_H, 0x20); \ + R_AD_12 = _mm256_permute2x128_si256(R_AB_H, R_CD_H, 0x31); \ + _mm256_store_si256(blockB_256, R_AD_0); \ + _mm256_store_si256(blockB_256 + 4, R_AD_4); \ + _mm256_store_si256(blockB_256 + 8, R_AD_8); \ + _mm256_store_si256(blockB_256 + 12, R_AD_12); \ + blockB_256++; + + // Pack cols in sets of 16 + for (Index n = 0; n < cols; n += 16) { + // Pack depth in sets of 16 + for (Index k = 0; k < depth; k += 16) { + __m256i R_A = rhs.loadPacket(k, n); + __m256i R_B = rhs.loadPacket(k, n + 1); + __m256i R_C = rhs.loadPacket(k, n + 2); + __m256i R_D = rhs.loadPacket(k, n + 3); + PACK_STEP; + + R_A = rhs.loadPacket(k, n + 4); + R_B = rhs.loadPacket(k, n + 5); + R_C = rhs.loadPacket(k, n + 6); + R_D = rhs.loadPacket(k, n + 7); + PACK_STEP; + + R_A = rhs.loadPacket(k, n + 8); + R_B = rhs.loadPacket(k, n + 9); + R_C = rhs.loadPacket(k, n + 10); + R_D = rhs.loadPacket(k, n + 11); + PACK_STEP; + + R_A = rhs.loadPacket(k, n + 12); + R_B = rhs.loadPacket(k, n + 13); + R_C = rhs.loadPacket(k, n + 14); + R_D = rhs.loadPacket(k, n + 15); + PACK_STEP; + + blockB_256 += 12; + } + } +#undef PACK_STEP +} + +// Perform the actual multiplication on packed inputs +template +struct gebp_kernel { + typedef typename DataMapper::LinearMapper LinearMapper; + + EIGEN_DONT_INLINE + void operator()(const DataMapper& res, const QInt16* blockA, + const QInt16* blockB, Index rows, Index depth, Index cols, + QInt32 alpha, Index strideA = -1, Index strideB = -1, + Index offsetA = 0, Index offsetB = 0); +}; + +template +EIGEN_DONT_INLINE void gebp_kernel:: +operator()(const DataMapper& res, const QInt16* blockA, const QInt16* blockB, + Index rows, Index depth, Index cols, QInt32 alpha, Index strideA, + Index strideB, Index offsetA, Index offsetB) { + EIGEN_STATIC_ASSERT(!ConjugateLhs, YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT(!ConjugateRhs, YOU_MADE_A_PROGRAMMING_MISTAKE); + eigen_assert(alpha.value == 1); + eigen_assert(strideA == -1); + eigen_assert(strideB == -1); + eigen_assert(offsetA == 0); + eigen_assert(offsetB == 0); + eigen_assert(rows > 0); + eigen_assert(cols > 0); + eigen_assert(depth > 0); + eigen_assert(blockA); + eigen_assert(blockB); + + // Use alternate function for weird sizes + if (rows % 16 != 0 || cols % 16 != 0 || depth % 16 != 0) { + assert(false && + "only depths, cols and rows that are a multiple of 16 are currently " + "supported"); + // gebp_kernel_any gebp; + // return gebp(res, blockA, blockB, rows, depth, cols, alpha, strideA, + // strideB, offsetA, offsetB); + } + + // Create result block + QInt32* blockO = aligned_new(16 * 16); + memset(blockO, 0, 16 * 16 * sizeof(QInt32)); + + // Get vectorized pointers + __m256i* blockO_256 = reinterpret_cast<__m256i*>(blockO); + const __m256i* blockA_256 = reinterpret_cast(blockA); + const __m256i* blockB_256 = reinterpret_cast(blockB); + + // Loop over blocks of 16 columns + for (Index n = 0; n < cols; n += 16) { + // Reset index into blockA + Index indexL = 0; + // Loop over blocks of 16 rows + for (Index m = 0; m < rows; m += 16) { + // Reset index into blockB + Index indexR = n / 16 * depth; + // Loop over blocks of 4 on depth + for (Index k = 0; k < depth; k += 4) { + // Load inputs + __m256i L_AD0 = blockA_256[indexL++]; + __m256i L_AD8 = blockA_256[indexL++]; + __m256i L_EH0 = blockA_256[indexL++]; + __m256i L_EH8 = blockA_256[indexL++]; + + __m256i R_AH0 = blockB_256[indexR++]; + __m256i R_AH4 = blockB_256[indexR++]; + __m256i R_AH8 = blockB_256[indexR++]; + __m256i R_AH12 = blockB_256[indexR++]; + + // Declare variables used in COMPUTE_STEP + __m256i P_32_A, P_32_B, P_32; + +#define COMPUTE_STEP(R_INPUT_A, R_INPUT_B, OFFSET) \ + P_32_A = _mm256_madd_epi16(R_INPUT_A, L_AD0); \ + P_32_B = _mm256_madd_epi16(R_INPUT_B, L_AD8); \ + P_32 = _mm256_add_epi32(P_32_A, P_32_B); \ + _mm256_store_si256( \ + blockO_256 + 2 * OFFSET, \ + _mm256_add_epi32(_mm256_load_si256(blockO_256 + 2 * OFFSET), P_32)); \ + \ + P_32_A = _mm256_madd_epi16(R_INPUT_A, L_EH0); \ + P_32_B = _mm256_madd_epi16(R_INPUT_B, L_EH8); \ + P_32 = _mm256_add_epi32(P_32_A, P_32_B); \ + _mm256_store_si256( \ + blockO_256 + 2 * OFFSET + 1, \ + _mm256_add_epi32(_mm256_load_si256(blockO_256 + 2 * OFFSET + 1), P_32)); + + // Permute and shuffle to copy a single value across the entire vector + // Then compute the multiplication + // Replicate lower 128-bits of R_AH0 across both lanes + __m256i R_AH0_ = _mm256_permute2x128_si256(R_AH0, R_AH0, 0x00); + // Copy first two elements of R_AH0 across entire vector + __m256i R_AD0 = _mm256_shuffle_epi32(R_AH0_, 0x00); + // Copy second two elements of R_AH0 across entire vector + __m256i R_EH0 = _mm256_shuffle_epi32(R_AH0_, 0x55); + + COMPUTE_STEP(R_AD0, R_EH0, 0); + __m256i R_AD1 = _mm256_shuffle_epi32(R_AH0_, 0xAA); + __m256i R_EH1 = _mm256_shuffle_epi32(R_AH0_, 0xFF); + COMPUTE_STEP(R_AD1, R_EH1, 1); + + // Replicate upper 128-bits of R_AH0 across both lanes + R_AH0_ = _mm256_permute2x128_si256(R_AH0, R_AH0, 0x11); + __m256i R_AD2 = _mm256_shuffle_epi32(R_AH0_, 0x00); + __m256i R_EH2 = _mm256_shuffle_epi32(R_AH0_, 0x55); + COMPUTE_STEP(R_AD2, R_EH2, 2); + __m256i R_AD3 = _mm256_shuffle_epi32(R_AH0_, 0xAA); + __m256i R_EH3 = _mm256_shuffle_epi32(R_AH0_, 0xFF); + COMPUTE_STEP(R_AD3, R_EH3, 3); + + R_AH0_ = _mm256_permute2x128_si256(R_AH4, R_AH4, 0x00); + R_AD0 = _mm256_shuffle_epi32(R_AH0_, 0x00); + R_EH0 = _mm256_shuffle_epi32(R_AH0_, 0x55); + COMPUTE_STEP(R_AD0, R_EH0, 4); + R_AD1 = _mm256_shuffle_epi32(R_AH0_, 0xAA); + R_EH1 = _mm256_shuffle_epi32(R_AH0_, 0xFF); + COMPUTE_STEP(R_AD1, R_EH1, 5); + R_AH0_ = _mm256_permute2x128_si256(R_AH4, R_AH4, 0x11); + R_AD2 = _mm256_shuffle_epi32(R_AH0_, 0x00); + R_EH2 = _mm256_shuffle_epi32(R_AH0_, 0x55); + COMPUTE_STEP(R_AD2, R_EH2, 6); + R_AD3 = _mm256_shuffle_epi32(R_AH0_, 0xAA); + R_EH3 = _mm256_shuffle_epi32(R_AH0_, 0xFF); + COMPUTE_STEP(R_AD3, R_EH3, 7); + + R_AH0_ = _mm256_permute2x128_si256(R_AH8, R_AH8, 0x00); + R_AD0 = _mm256_shuffle_epi32(R_AH0_, 0x00); + R_EH0 = _mm256_shuffle_epi32(R_AH0_, 0x55); + COMPUTE_STEP(R_AD0, R_EH0, 8); + R_AD1 = _mm256_shuffle_epi32(R_AH0_, 0xAA); + R_EH1 = _mm256_shuffle_epi32(R_AH0_, 0xFF); + COMPUTE_STEP(R_AD1, R_EH1, 9); + R_AH0_ = _mm256_permute2x128_si256(R_AH8, R_AH8, 0x11); + R_AD2 = _mm256_shuffle_epi32(R_AH0_, 0x00); + R_EH2 = _mm256_shuffle_epi32(R_AH0_, 0x55); + COMPUTE_STEP(R_AD2, R_EH2, 10); + R_AD3 = _mm256_shuffle_epi32(R_AH0_, 0xAA); + R_EH3 = _mm256_shuffle_epi32(R_AH0_, 0xFF); + COMPUTE_STEP(R_AD3, R_EH3, 11); + + R_AH0_ = _mm256_permute2x128_si256(R_AH12, R_AH12, 0x00); + R_AD0 = _mm256_shuffle_epi32(R_AH0_, 0x00); + R_EH0 = _mm256_shuffle_epi32(R_AH0_, 0x55); + COMPUTE_STEP(R_AD0, R_EH0, 12); + R_AD1 = _mm256_shuffle_epi32(R_AH0_, 0xAA); + R_EH1 = _mm256_shuffle_epi32(R_AH0_, 0xFF); + COMPUTE_STEP(R_AD1, R_EH1, 13); + R_AH0_ = _mm256_permute2x128_si256(R_AH12, R_AH12, 0x11); + R_AD2 = _mm256_shuffle_epi32(R_AH0_, 0x00); + R_EH2 = _mm256_shuffle_epi32(R_AH0_, 0x55); + COMPUTE_STEP(R_AD2, R_EH2, 14); + R_AD3 = _mm256_shuffle_epi32(R_AH0_, 0xAA); + R_EH3 = _mm256_shuffle_epi32(R_AH0_, 0xFF); + COMPUTE_STEP(R_AD3, R_EH3, 15); + +#undef COMPUTE_STEP + } + + // Transfer the results to the result matrix + Index i = 0; + for (Index j = n; j < n + 16; j++) { + LinearMapper r0 = res.getLinearMapper(m, j); + LinearMapper r1 = res.getLinearMapper(m + 8, j); + + r0.storePacket(0, _mm256_add_epi32(blockO_256[i++], r0.loadPacket(0))); + r1.storePacket(0, _mm256_add_epi32(blockO_256[i++], r1.loadPacket(0))); + } + + // Zero the result block so it can be reused + memset(blockO, 0, 16 * 16 * sizeof(QInt32)); + } + } + aligned_delete(blockO, 16 * 16); +} + +#endif + // AVX2 optimized implementation of Mat-Mat product. // LHS is encoded using signed 8-bit integers. // RHS is encoded using unsigned 8-bit integers. @@ -1751,4 +2227,4 @@ void gebp_kernel +struct general_matrix_vector_product { + EIGEN_DONT_INLINE static void run(Index rows, Index cols, + const LhsMapper& lhs, const RhsMapper& rhs, + QInt32* res, Index resIncr, QInt16 alpha); +}; + +template +EIGEN_DONT_INLINE void general_matrix_vector_product< + Index, QInt16, LhsMapper, ColMajor, ConjugateLhs, QInt16, RhsMapper, + ConjugateRhs, Version>::run(Index rows, Index cols, const LhsMapper& lhs, + const RhsMapper& rhs, QInt32* res, + Index resIncr, QInt16 alpha) { + eigen_assert(alpha.value == 1); + eigen_assert(resIncr == 1); + eigen_assert(rows > 0); + eigen_assert(cols > 0); + + for (Index i = 0; i < rows; ++i) { + for (Index j = 0; j < cols; ++j) { + res[i] += lhs(i, j) * rhs(j, 0); + } + } +} // Mat-Vec product // The lhs is encoded using 8bit signed integers, the rhs using 8bit unsigned integers @@ -118,6 +147,4 @@ EIGEN_DONT_INLINE void general_matrix_vector_product @@ -29,7 +28,6 @@ inline int _mm256_extract_epi8_N1(const __m256i X) return _mm_extract_epi8(_mm256_extractf128_si256((X), 1 >> 4), 1 % 16); } - namespace Eigen { namespace internal { @@ -502,4 +500,4 @@ struct functor_traits> { } // end namespace internal } // end namespace Eigen -#endif // EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX2_H_ +#endif // CXX11_SRC_FIXEDPOINT_PACKETMATHAVX2_H_ diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h index 8f9906dbf9..2092ce1d4c 100644 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h +++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h @@ -1,5 +1,5 @@ -#ifndef EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX512_H_ -#define EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX512_H_ +#ifndef CXX11_SRC_FIXEDPOINT_PACKETMATHAVX512_H_ +#define CXX11_SRC_FIXEDPOINT_PACKETMATHAVX512_H_ #include "PacketMathAVX2.h" @@ -542,4 +542,4 @@ EIGEN_STRONG_INLINE QInt8 predux_max(const Packet64q8i& a) { } // end namespace internal } // end namespace Eigen -#endif // EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX512_H_ +#endif // CXX11_SRC_FIXEDPOINT_PACKETMATHAVX512_H_ diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h index 7b4ecc752f..9561d6a338 100644 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h +++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h @@ -1,5 +1,5 @@ -#ifndef EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_ -#define EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_ +#ifndef CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_ +#define CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_ namespace Eigen { namespace internal { @@ -52,8 +52,16 @@ template <> EIGEN_STRONG_INLINE Packet32q8u pcast(const Packet8q32i& a, const Packet8q32i& b, const Packet8q32i& c, const Packet8q32i& d) { + // _mm256_packus_epi32 trims negative numbers to 0 but we can't allow numbers + // that are too large because _mm256_packus_epi16 expects signed input + // (example of problem input: 0x11111111, which saturates to 0xffff = -1, + // which saturates to 0). + const __m256i a_clip = _mm256_min_epi32(a, _mm256_set1_epi32(255)); + const __m256i b_clip = _mm256_min_epi32(b, _mm256_set1_epi32(255)); + const __m256i c_clip = _mm256_min_epi32(c, _mm256_set1_epi32(255)); + const __m256i d_clip = _mm256_min_epi32(d, _mm256_set1_epi32(255)); const __m256i converted = _mm256_packus_epi16( - _mm256_packs_epi32(a.val, b.val), _mm256_packs_epi32(c.val, d.val)); + _mm256_packus_epi32(a_clip, b_clip), _mm256_packus_epi32(c_clip, d_clip)); // Since packus does not cross 128 bit lane boundaries, // we have to permute to properly order the final result. const __m256i permute_mask = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0); @@ -63,4 +71,4 @@ pcast(const Packet8q32i& a, const Packet8q32i& b, } // end namespace internal } // end namespace Eigen -#endif // EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_ +#endif // CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_ diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h index 26735743d4..a09eac6707 100644 --- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h +++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h @@ -1,5 +1,5 @@ -#ifndef EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_ -#define EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_ +#ifndef CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_ +#define CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_ namespace Eigen { namespace internal { @@ -132,8 +132,15 @@ pcast(const Packet16q32i& a, const Packet16q32i& b, const Packet16q32i& c, const Packet16q32i& d) { - __m512i converted = _mm512_packs_epi16(_mm512_packs_epi32(a.val, b.val), - _mm512_packs_epi32(c.val, d.val)); + __m128i a_part = _mm512_cvtsepi32_epi8(a); + __m128i b_part = _mm512_cvtsepi32_epi8(b); + __m128i c_part = _mm512_cvtsepi32_epi8(c); + __m128i d_part = _mm512_cvtsepi32_epi8(d); + __m256i ab = + _mm256_inserti128_si256(_mm256_castsi128_si256(a_part), b_part, 1); + __m256i cd = + _mm256_inserti128_si256(_mm256_castsi128_si256(c_part), d_part, 1); + __m512i converted = _mm512_inserti64x4(_mm512_castsi256_si512(ab), cd, 1); return converted; } @@ -141,7 +148,10 @@ template <> EIGEN_STRONG_INLINE Packet32q16i pcast(const Packet16q32i& a, const Packet16q32i& b) { - __m512i converted = _mm512_packs_epi32(a.val, b.val); + __m256i a_part = _mm512_cvtsepi32_epi16(a); + __m256i b_part = _mm512_cvtsepi32_epi16(b); + __m512i converted = + _mm512_inserti64x4(_mm512_castsi256_si512(a_part), b_part, 1); return converted; } @@ -154,22 +164,45 @@ template <> EIGEN_STRONG_INLINE Packet64q8u pcast(const Packet16q32i& a, const Packet16q32i& b, const Packet16q32i& c, const Packet16q32i& d) { - const __m512i converted = _mm512_packus_epi16( - _mm512_packus_epi32(a.val, b.val), _mm512_packus_epi32(c.val, d.val)); + // Brute-force saturation since there isn't a pack operation for unsigned + // numbers that keeps the elements in order. + __m128i a_part = _mm512_cvtepi32_epi8(_mm512_max_epi32( + _mm512_min_epi32(a, _mm512_set1_epi32(255)), _mm512_setzero_si512())); + __m128i b_part = _mm512_cvtepi32_epi8(_mm512_max_epi32( + _mm512_min_epi32(b, _mm512_set1_epi32(255)), _mm512_setzero_si512())); + __m128i c_part = _mm512_cvtepi32_epi8(_mm512_max_epi32( + _mm512_min_epi32(c, _mm512_set1_epi32(255)), _mm512_setzero_si512())); + __m128i d_part = _mm512_cvtepi32_epi8(_mm512_max_epi32( + _mm512_min_epi32(d, _mm512_set1_epi32(255)), _mm512_setzero_si512())); + __m256i ab = + _mm256_inserti128_si256(_mm256_castsi128_si256(a_part), b_part, 1); + __m256i cd = + _mm256_inserti128_si256(_mm256_castsi128_si256(c_part), d_part, 1); + __m512i converted = _mm512_inserti64x4(_mm512_castsi256_si512(ab), cd, 1); return converted; } +#if 0 +// The type Packet32q16u does not exist for AVX-512 yet template <> struct type_casting_traits { enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 }; }; -#if 0 template <> EIGEN_STRONG_INLINE Packet32q16u pcast(const Packet16q32i& a, const Packet16q32i& b) { - const __m512i converted = _mm512_packus_epi32(a.val, b.val); + // Brute-force saturation since there isn't a pack operation for unsigned + // numbers that keeps the elements in order. + __m256i a_part = + _mm512_cvtepi32_epi16(_mm512_max_epi32( + _mm512_min_epi32(a, _mm512_set1_epi32(65535)), _mm512_setzero_si512())); + __m256i b_part = _mm512_cvtepi32_epi16( + _mm512_max_epi32(_mm512_min_epi32(b, _mm512_set1_epi32(65535)), + _mm512_setzero_si512())); + __m512i converted = + _mm512_inserti64x4(_mm512_castsi256_si512(a_part), b_part, 1); return converted; } #endif @@ -177,4 +210,4 @@ pcast(const Packet16q32i& a, } // end namespace internal } // end namespace Eigen -#endif // EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_ +#endif // CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_ -- cgit v1.2.3 From f017a9b9b400e65e7d616675beb46fea24f8bb2a Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 30 Jul 2018 14:52:10 -0700 Subject: Fix some whitespace and lint issues with llvm.bzl. PiperOrigin-RevId: 206651482 --- third_party/llvm/llvm.bzl | 236 ++++++++++++++++++++++++++++------------------ 1 file changed, 142 insertions(+), 94 deletions(-) (limited to 'third_party') diff --git a/third_party/llvm/llvm.bzl b/third_party/llvm/llvm.bzl index dfdacafceb..c5bebc9f2f 100644 --- a/third_party/llvm/llvm.bzl +++ b/third_party/llvm/llvm.bzl @@ -7,103 +7,143 @@ TODO(chandlerc): Currently this expresses include-based dependencies as correctly understood by the build system. """ +def _dict_add(*dictionaries): + """Returns a new `dict` that has all the entries of the given dictionaries. + + If the same key is present in more than one of the input dictionaries, the + last of them in the argument list overrides any earlier ones. + + This function is designed to take zero or one arguments as well as multiple + dictionaries, so that it follows arithmetic identities and callers can avoid + special cases for their inputs: the sum of zero dictionaries is the empty + dictionary, and the sum of a single dictionary is a copy of itself. + + Re-implemented here to avoid adding a dependency on skylib. + + Args: + *dictionaries: Zero or more dictionaries to be added. + + Returns: + A new `dict` that has all the entries of the given dictionaries. + """ + result = {} + for d in dictionaries: + result.update(d) + return result + def gentbl(name, tblgen, td_file, td_srcs, tbl_outs, library = True, **kwargs): - """gentbl() generates tabular code from a table definition file. - - Args: - name: The name of the build rule for use in dependencies. - tblgen: The binary used to produce the output. - td_file: The primary table definitions file. - td_srcs: A list of table definition files included transitively. - tbl_outs: A list of tuples (opts, out), where each opts is a string of - options passed to tblgen, and the out is the corresponding output file - produced. - library: Whether to bundle the generated files into a library. - **kwargs: Keyword arguments to pass to subsidiary cc_library() rule. - """ - if td_file not in td_srcs: - td_srcs += [td_file] - includes = [] - for (opts, out) in tbl_outs: - outdir = out[:out.rindex("/")] - if outdir not in includes: - includes.append(outdir) - rule_suffix = "_".join(opts.replace("-", "_").replace("=", "_").split(" ")) - native.genrule( - name="%s_%s_genrule" % (name, rule_suffix), - srcs=td_srcs, - outs=[out], - tools=[tblgen], - message="Generating code from table: %s" % td_file, - cmd=(("$(location %s) " + "-I external/llvm/include " + - "-I external/llvm/tools/clang/include " + - "-I $$(dirname $(location %s)) " + "%s $(location %s) -o $@") % ( - tblgen, td_file, opts, td_file))) - # For now, all generated files can be assumed to comprise public interfaces. - # If this is not true, you should specify library = False - # and list the generated '.inc' files in "srcs". - if library: - native.cc_library(name=name, textual_hdrs=[f for (_, f) in tbl_outs], - includes=includes, **kwargs) + """gentbl() generates tabular code from a table definition file. + + Args: + name: The name of the build rule for use in dependencies. + tblgen: The binary used to produce the output. + td_file: The primary table definitions file. + td_srcs: A list of table definition files included transitively. + tbl_outs: A list of tuples (opts, out), where each opts is a string of + options passed to tblgen, and the out is the corresponding output file + produced. + library: Whether to bundle the generated files into a library. + **kwargs: Keyword arguments to pass to subsidiary cc_library() rule. + """ + if td_file not in td_srcs: + td_srcs += [td_file] + includes = [] + for (opts, out) in tbl_outs: + outdir = out[:out.rindex("/")] + if outdir not in includes: + includes.append(outdir) + rule_suffix = "_".join(opts.replace("-", "_").replace("=", "_").split(" ")) + native.genrule( + name = "%s_%s_genrule" % (name, rule_suffix), + srcs = td_srcs, + outs = [out], + tools = [tblgen], + message = "Generating code from table: %s" % td_file, + cmd = (("$(location %s) " + "-I external/llvm/include " + + "-I external/llvm/tools/clang/include " + + "-I $$(dirname $(location %s)) " + "%s $(location %s) -o $@") % ( + tblgen, + td_file, + opts, + td_file, + )), + ) + + # For now, all generated files can be assumed to comprise public interfaces. + # If this is not true, you should specify library = False + # and list the generated '.inc' files in "srcs". + if library: + native.cc_library( + name = name, + textual_hdrs = [f for (_, f) in tbl_outs], + includes = includes, + **kwargs + ) def llvm_target_cmake_vars(native_arch, target_triple): - return { - "LLVM_HOST_TRIPLE": target_triple, - "LLVM_DEFAULT_TARGET_TRIPLE": target_triple, - "LLVM_NATIVE_ARCH": native_arch, - } + return { + "LLVM_HOST_TRIPLE": target_triple, + "LLVM_DEFAULT_TARGET_TRIPLE": target_triple, + "LLVM_NATIVE_ARCH": native_arch, + } def _quote(s): - """Quotes the given string for use in a shell command. - - This function double-quotes the given string (in case it contains spaces or - other special characters) and escapes any special characters (dollar signs, - double-quotes, and backslashes) that may be present. - - Args: - s: The string to quote. - Returns: - An escaped and quoted version of the string that can be passed to a shell - command. - """ - return ('"' + - s.replace("\\", "\\\\").replace("$", "\\$").replace('"', '\\"') + - '"') + """Quotes the given string for use in a shell command. + + This function double-quotes the given string (in case it contains spaces or + other special characters) and escapes any special characters (dollar signs, + double-quotes, and backslashes) that may be present. + + Args: + s: The string to quote. + + Returns: + An escaped and quoted version of the string that can be passed to a shell + command. + """ + return ('"' + + s.replace("\\", "\\\\").replace("$", "\\$").replace('"', '\\"') + + '"') def cmake_var_string(cmake_vars): - """Converts a dictionary to an input suitable for expand_cmake_vars. + """Converts a dictionary to an input suitable for expand_cmake_vars. - Ideally we would jist stringify in the expand_cmake_vars() rule, but select() - interacts badly with genrules. + Ideally we would jist stringify in the expand_cmake_vars() rule, but select() + interacts badly with genrules. - TODO(phawkins): replace the genrule() with native rule and delete this rule. + TODO(phawkins): replace the genrule() with native rule and delete this rule. - Args: - cmake_vars: a dictionary with string keys and values that are convertable to - strings. - """ - return " ".join([_quote("{}={}".format(k, str(v))) - for (k, v) in cmake_vars.items()]) + Args: + cmake_vars: a dictionary with string keys and values that are convertable to + strings. + + Returns: + cmake_vars in a form suitable for passing to expand_cmake_vars. + """ + return " ".join([ + _quote("{}={}".format(k, str(v))) + for (k, v) in cmake_vars.items() + ]) def expand_cmake_vars(name, src, dst, cmake_vars): - """Expands #cmakedefine, #cmakedefine01, and CMake variables in a text file. - - Args: - name: the name of the rule - src: the input of the rule - dst: the output of the rule - cmake_vars: a string containing the CMake variables, as generated by - cmake_var_string. - """ - expand_cmake_vars_tool = Label("@org_tensorflow//third_party/llvm:expand_cmake_vars") - native.genrule( - name = name, - srcs = [src], - tools = [expand_cmake_vars_tool], - outs = [dst], - cmd = ("$(location {}) ".format(expand_cmake_vars_tool) + cmake_vars + - "< $< > $@") - ) + """Expands #cmakedefine, #cmakedefine01, and CMake variables in a text file. + + Args: + name: the name of the rule + src: the input of the rule + dst: the output of the rule + cmake_vars: a string containing the CMake variables, as generated by + cmake_var_string. + """ + expand_cmake_vars_tool = Label("@org_tensorflow//third_party/llvm:expand_cmake_vars") + native.genrule( + name = name, + srcs = [src], + tools = [expand_cmake_vars_tool], + outs = [dst], + cmd = ("$(location {}) ".format(expand_cmake_vars_tool) + cmake_vars + + "< $< > $@"), + ) # TODO(phawkins): the set of CMake variables was hardcoded for expediency. # However, we should really detect many of these via configure-time tests. @@ -212,18 +252,26 @@ darwin_cmake_vars = { # than hardcoding x86_64. llvm_all_cmake_vars = select({ "@org_tensorflow//tensorflow:darwin": cmake_var_string( - cmake_vars + llvm_target_cmake_vars("X86", "x86_64-apple-darwin") + - darwin_cmake_vars), + _dict_add( + cmake_vars, + llvm_target_cmake_vars("X86", "x86_64-apple-darwin"), + darwin_cmake_vars, + ), + ), "@org_tensorflow//tensorflow:linux_ppc64le": cmake_var_string( - cmake_vars + - llvm_target_cmake_vars("PowerPC", "powerpc64le-unknown-linux_gnu") + - linux_cmake_vars, + _dict_add( + cmake_vars, + llvm_target_cmake_vars("PowerPC", "powerpc64le-unknown-linux_gnu"), + linux_cmake_vars, + ), ), "//conditions:default": cmake_var_string( - cmake_vars + - llvm_target_cmake_vars("X86", "x86_64-unknown-linux_gnu") + - linux_cmake_vars), - + _dict_add( + cmake_vars, + llvm_target_cmake_vars("X86", "x86_64-unknown-linux_gnu"), + linux_cmake_vars, + ), + ), }) llvm_linkopts = ["-ldl", "-lm", "-lpthread"] -- cgit v1.2.3 From efddd17519e43e956c3fa79981c408803521fe61 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 30 Jul 2018 15:26:57 -0700 Subject: For llvm_support_platform_specific_srcs_glob use a select-of-glob instead of glob-of-select. This was suggested in https://github.com/tensorflow/tensorflow/commit/add96c8632df9596a73bf637d6b7015e9c7beaad#r29707275 PiperOrigin-RevId: 206657397 --- third_party/llvm/llvm.autogenerated.BUILD | 2 +- third_party/llvm/llvm.bzl | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'third_party') diff --git a/third_party/llvm/llvm.autogenerated.BUILD b/third_party/llvm/llvm.autogenerated.BUILD index c3b9ec4c25..0ac27e26a4 100644 --- a/third_party/llvm/llvm.autogenerated.BUILD +++ b/third_party/llvm/llvm.autogenerated.BUILD @@ -1942,7 +1942,7 @@ cc_library( "include/llvm/BinaryFormat/COFF.h", "include/llvm/BinaryFormat/MachO.h", "lib/Support/*.h", - ] + llvm_support_platform_specific_srcs_glob), + ]) + llvm_support_platform_specific_srcs_glob(), hdrs = glob([ "include/llvm/Support/*.h", "include/llvm/Support/*.def", diff --git a/third_party/llvm/llvm.bzl b/third_party/llvm/llvm.bzl index c5bebc9f2f..d493a3c476 100644 --- a/third_party/llvm/llvm.bzl +++ b/third_party/llvm/llvm.bzl @@ -289,7 +289,10 @@ llvm_copts = [] # Platform specific sources for libSupport. -llvm_support_platform_specific_srcs_glob = [ - "lib/Support/Unix/*.inc", - "lib/Support/Unix/*.h", -] +def llvm_support_platform_specific_srcs_glob(): + return select({ + "//conditions:default": native.glob([ + "lib/Support/Unix/*.inc", + "lib/Support/Unix/*.h", + ]), + }) -- cgit v1.2.3