aboutsummaryrefslogtreecommitdiffhomepage
path: root/third_party
diff options
context:
space:
mode:
authorGravatar avijit-nervana <avijit.chakraborty@intel.com>2018-07-31 23:04:56 -0700
committerGravatar avijit-nervana <avijit.chakraborty@intel.com>2018-07-31 23:04:56 -0700
commit2f8b328d2571625161b3c0da0a9b25b907a0927e (patch)
treed264a0d5dbfcd774cccd0f2c23d7616897c1b894 /third_party
parent2f3e97cf0ee50ee0e55ab1a3795cc82537426e8c (diff)
parent7ca6ee15555db77c09861fc7e84e5181001da07d (diff)
Merge remote-tracking branch 'upstream/master' and changes
based on PR review comments.
Diffstat (limited to 'third_party')
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/Core46
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/NeuralNetworks35
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h6
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProduct.h86
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h482
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductNEON.h9
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatVecProduct.h39
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h8
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h6
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h16
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h53
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Activations.h116
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Attention.h209
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/BackwardCuboidConvolutions.h523
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/BackwardSpatialConvolutions.h351
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/CuboidConvolution.h179
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Patch3d.h240
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Pooling.h433
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/SoftMax.h83
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/SpatialConvolutions.h775
-rw-r--r--third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/TensorConvolutionByFFT.h289
-rw-r--r--third_party/llvm/llvm.autogenerated.BUILD2
-rw-r--r--third_party/llvm/llvm.bzl247
-rw-r--r--third_party/ngraph/build_defs.bzl10
-rw-r--r--third_party/ngraph/ngraph_tf.BUILD3
-rw-r--r--third_party/toolchains/BUILD2
26 files changed, 815 insertions, 3433 deletions
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/Core b/third_party/eigen3/unsupported/Eigen/CXX11/Core
deleted file mode 100644
index 1b3690716c..0000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/Core
+++ /dev/null
@@ -1,46 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2013 Christian Seiler <christian@iwakd.de>
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_CORE_MODULE
-#define EIGEN_CXX11_CORE_MODULE
-
-#include <Eigen/Core>
-
-#include <Eigen/src/Core/util/DisableStupidWarnings.h>
-
-/** \defgroup CXX11_Core_Module C++11 Core Module
- *
- * This module provides common core features for all modules that
- * explicitly depend on C++11. Currently, this is only the Tensor
- * module. Note that at this stage, you should not need to include
- * this module directly.
- *
- * It also provides a limited fallback for compilers that don't support
- * CXX11 yet, such as nvcc.
- *
- * \code
- * #include <Eigen/CXX11/Core>
- * \endcode
- */
-
-// Only a subset of cxx11 is allowed at Google, so we default to emulate the
-// cxx11 functionality that we need.
-#include "src/Core/util/FixedSizeVector.h"
-#if 1
-#include <vector>
-#include "src/Core/util/EmulateCXX11Meta.h"
-#else
-#include "src/Core/util/CXX11Workarounds.h"
-#include "src/Core/util/CXX11Meta.h"
-#endif
-#include <Eigen/src/Core/util/ReenableStupidWarnings.h>
-
-#endif // EIGEN_CXX11_CORE_MODULE
-
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/NeuralNetworks b/third_party/eigen3/unsupported/Eigen/CXX11/NeuralNetworks
deleted file mode 100644
index 7741b68d8a..0000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/NeuralNetworks
+++ /dev/null
@@ -1,35 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_NEURAL_NETWORKS_MODULE
-#define EIGEN_CXX11_NEURAL_NETWORKS_MODULE
-
-#include "unsupported/Eigen/CXX11/Tensor"
-
-/** \defgroup CXX11_NeuralNetworks_Module Neural Networks Module
- *
- * This module provides an efficient implementation of the common primitives
- * used by neural networks.
- * The primitives are built on top of the tensor library.
- *
- * \code
- * #include <Eigen/CXX11/NeuralNetworks>
- * \endcode
- */
-
-#include "unsupported/Eigen/CXX11/src/NeuralNetworks/Activations.h"
-#include "unsupported/Eigen/CXX11/src/NeuralNetworks/Attention.h"
-#include "unsupported/Eigen/CXX11/src/NeuralNetworks/Pooling.h"
-#include "unsupported/Eigen/CXX11/src/NeuralNetworks/SoftMax.h"
-#include "unsupported/Eigen/CXX11/src/NeuralNetworks/BackwardCuboidConvolutions.h"
-#include "unsupported/Eigen/CXX11/src/NeuralNetworks/CuboidConvolution.h"
-#include "unsupported/Eigen/CXX11/src/NeuralNetworks/BackwardSpatialConvolutions.h"
-#include "unsupported/Eigen/CXX11/src/NeuralNetworks/SpatialConvolutions.h"
-
-#endif // EIGEN_CXX11_NEURAL_NETWORKS_MODULE
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h
index 6b625abc3e..5ab3664918 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/FixedPointTypes.h
@@ -7,8 +7,8 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-#ifndef EIGEN_CXX11_FIXED_POINT_TYPES_H
-#define EIGEN_CXX11_FIXED_POINT_TYPES_H
+#ifndef CXX11_SRC_FIXEDPOINT_FIXEDPOINTTYPES_H_
+#define CXX11_SRC_FIXEDPOINT_FIXEDPOINTTYPES_H_
#include <cmath>
#include <iostream>
@@ -339,4 +339,4 @@ EIGEN_STRONG_INLINE std::ostream& operator<<(std::ostream& os, QInt32 a) {
} // namespace Eigen
-#endif // EIGEN_CXX11_FIXED_POINT_TYPES_H
+#endif // CXX11_SRC_FIXEDPOINT_FIXEDPOINTTYPES_H_
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProduct.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProduct.h
index 4d0dca07df..e6f4080ae1 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProduct.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProduct.h
@@ -7,9 +7,8 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-#ifndef EIGEN_CXX11_FIXED_POINT_MAT_MAT_PRODUCT_H
-#define EIGEN_CXX11_FIXED_POINT_MAT_MAT_PRODUCT_H
-
+#ifndef CXX11_SRC_FIXEDPOINT_MATMATPRODUCT_H_
+#define CXX11_SRC_FIXEDPOINT_MATMATPRODUCT_H_
namespace Eigen {
namespace internal {
@@ -24,6 +23,14 @@ template<> struct scalar_product_traits<QInt8, QInt8>
typedef QInt32 ReturnType;
};
+// Accumulate the product of 2 QInt16 inputs on 32 bits to prevent
+// overflows
+template <>
+struct scalar_product_traits<QInt16, QInt16> {
+ enum { Defined = 1 };
+ typedef QInt32 ReturnType;
+};
+
// Accumulate the product of QInt8 inputs with QUint8 inputs on 32 bits
// to prevent overflows
template<> struct scalar_product_traits<QInt8, QUInt8>
@@ -247,9 +254,76 @@ void gebp_kernel<QUInt8, QInt8, Index, DataMapper, mr, nr, ConjugateLhs, Conjuga
}
#endif
-} // namespace internal
-} // namespace Eigen
+#ifndef EIGEN_USE_OPTIMIZED_INT16_INT16_MAT_MAT_PRODUCT
+
+template <bool _ConjLhs, bool _ConjRhs>
+class gebp_traits<QInt16, QInt16, _ConjLhs, _ConjRhs> {
+ public:
+ typedef QInt16 LhsScalar;
+ typedef QInt16 RhsScalar;
+ typedef QInt32 ResScalar;
+
+ enum {
+ // register block size along the M and N directions
+ // One for the current implementation
+ nr = 1,
+ mr = 1,
+ // Progress made at each iteration of the product loop
+ // also 1 for the current implementation
+ LhsProgress = 1,
+ RhsProgress = 1
+ };
+};
+
+// The signed 16bit Mat-Mat product itself.
+template <typename Index, typename DataMapper, int mr, int nr,
+ bool ConjugateLhs, bool ConjugateRhs>
+struct gebp_kernel<QInt16, QInt16, Index, DataMapper, mr, nr, ConjugateLhs,
+ ConjugateRhs> {
+ EIGEN_DONT_INLINE
+ void operator()(const DataMapper& res, const QInt16* blockA,
+ const QInt16* blockB, Index rows, Index depth, Index cols,
+ QInt32 alpha, Index strideA = -1, Index strideB = -1,
+ Index offsetA = 0, Index offsetB = 0);
+};
+
+template <typename Index, typename DataMapper, int mr, int nr,
+ bool ConjugateLhs, bool ConjugateRhs>
+EIGEN_DONT_INLINE void gebp_kernel<QInt16, QInt16, Index, DataMapper, mr, nr,
+ ConjugateLhs, ConjugateRhs>::
+operator()(const DataMapper& res, const QInt16* blockA, const QInt16* blockB,
+ Index rows, Index depth, Index cols, QInt32 alpha, Index strideA,
+ Index strideB, Index offsetA, Index offsetB) {
+ EIGEN_STATIC_ASSERT(!ConjugateLhs, YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT(!ConjugateRhs, YOU_MADE_A_PROGRAMMING_MISTAKE);
+ eigen_assert(alpha.value == 1);
+ eigen_assert(strideA == -1);
+ eigen_assert(strideB == -1);
+ eigen_assert(offsetA == 0);
+ eigen_assert(offsetB == 0);
+
+ eigen_assert(rows > 0);
+ eigen_assert(cols > 0);
+ eigen_assert(depth > 0);
+ eigen_assert(blockA);
+ eigen_assert(blockB);
+
+ for (Index j = 0; j < cols; ++j) {
+ Index startB = j * depth;
+ for (Index i = 0; i < rows; ++i) {
+ Index startA = i * depth;
+
+ for (Index k = 0; k < depth; ++k) {
+ res(i, j) += blockA[startA + k] * blockB[startB + k];
+ }
+ }
+ }
+}
+#endif
+
+} // namespace internal
+} // namespace Eigen
-#endif // EIGEN_CXX11_FIXED_POINT_MAT_MAT_PRODUCT_H
+#endif // CXX11_SRC_FIXEDPOINT_MATMATPRODUCT_H_
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h
index 6b4b0edcfb..66532fb600 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductAVX2.h
@@ -3,18 +3,494 @@
//
// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
// Copyright (C) 2015 Matthew Sarett <msarett@google.com>
+// Copyright (C) 2016 Nishant Patil <nishantpatil@google.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-#ifndef EIGEN_CXX11_FIXED_POINT_MAT_MAT_PRODUCT_AVX2_H
-#define EIGEN_CXX11_FIXED_POINT_MAT_MAT_PRODUCT_AVX2_H
+#ifndef CXX11_SRC_FIXEDPOINT_MATMATPRODUCTAVX2_H_
+#define CXX11_SRC_FIXEDPOINT_MATMATPRODUCTAVX2_H_
namespace Eigen {
namespace internal {
// AVX2 optimized implementation of Mat-Mat product.
+// LHS is encoded using signed 16-bit integers.
+// RHS is encoded using signed 16-bit integers.
+#ifdef EIGEN_USE_OPTIMIZED_INT16_INT16_MAT_MAT_PRODUCT
+
+// Define quantized traits
+template <bool _ConjLhs, bool _ConjRhs>
+class gebp_traits<QInt16, QInt16, _ConjLhs, _ConjRhs> {
+ public:
+ typedef QInt16 LhsScalar;
+ typedef QInt16 RhsScalar;
+ typedef QInt32 ResScalar;
+
+ enum {
+ // Define register blocking scheme.
+ nr = 16,
+ mr = 16,
+ kr = 4,
+ // Ignore progress tracking per loop iteration.
+ LhsProgress = -1,
+ RhsProgress = -1
+ };
+};
+
+// Specialized blocking for quantized implementations.
+// Used by TensorContractionThreadPool, inputs must have dimensions that are
+// multiples of 32.
+template <typename Index, int ShardingType>
+class TensorContractionBlocking<QInt16, QInt16, Index, ShardingType> {
+ public:
+ TensorContractionBlocking(Index k, Index m, Index n, Index num_threads = 1)
+ : kc_(((k + 15) / 16) * 16),
+ mc_(((m + 15) / 16) * 16),
+ nc_(((n + 15) / 16) * 16) {
+ eigen_assert(mc_ % 16 == 0);
+ eigen_assert(kc_ % 16 == 0);
+ if (!k || !m || !n) {
+ return;
+ }
+
+ if (ShardingType == ShardByCol) {
+ eigen_assert(nc_ % 16 == 0);
+ nc_ = (((nc_ / num_threads) + 15) / 16) * 16;
+ } else {
+ eigen_assert(nc_ % 16 == 0);
+ mc_ = (((mc_ / num_threads) + 15) / 16) * 16;
+ }
+ }
+
+ EIGEN_ALWAYS_INLINE Index kc() const { return kc_; }
+ EIGEN_ALWAYS_INLINE Index mc() const { return mc_; }
+ EIGEN_ALWAYS_INLINE Index nc() const { return nc_; }
+
+ private:
+ Index kc_;
+ Index mc_;
+ Index nc_;
+};
+
+// Specialized blocking for quantized implementations.
+// Used by TensorContraction and GeneralMatrixMatrix, inputs are padded to
+// multiples of 32.
+template <int MaxRows, int MaxCols, int MaxDepth, int KcFactor>
+class gemm_blocking_space<ColMajor, QInt16, QInt16, MaxRows, MaxCols, MaxDepth,
+ KcFactor, false>
+ : public level3_blocking<QInt16, QInt16> {
+ DenseIndex m_sizeA;
+ DenseIndex m_sizeB;
+
+ public:
+ gemm_blocking_space(DenseIndex rows, DenseIndex cols, DenseIndex depth,
+ DenseIndex /*num_threads*/, bool /*l3_blocking*/) {
+ this->m_mc = ((rows + 15) / 16) * 16;
+ this->m_nc = ((cols + 15) / 16) * 16;
+ this->m_kc = ((depth + 15) / 16) * 16;
+ m_sizeA = this->m_mc * this->m_kc;
+ m_sizeB = this->m_kc * this->m_nc;
+ }
+ void allocateA() {
+ if (this->m_blockA == 0) this->m_blockA = aligned_new<QInt16>(m_sizeA);
+ }
+ void allocateB() {
+ if (this->m_blockB == 0) this->m_blockB = aligned_new<QInt16>(m_sizeB);
+ }
+ void allocateAll() {
+ allocateA();
+ allocateB();
+ }
+ ~gemm_blocking_space() {
+ aligned_delete(this->m_blockA, m_sizeA);
+ aligned_delete(this->m_blockB, m_sizeB);
+ }
+};
+
+// Below are the fully optimized versions that are correct only for sizes that
+// are multiple of 16. It is about a 10% performance benefit to keep these
+// implementations separate.
+
+// Arrange a block of the left input matrix in contiguous memory.
+//
+// Given column major input (A0 beside A1 in memory):
+// A0 B0 C0 D0 E0 F0 G0 H0 ...
+// A1 B1 C1 D1 E1 F1 G1 H1 ...
+// A2 B2 C2 D2 E2 F2 G2 H2 ...
+// A3 B3 C3 D3 E3 F3 G3 H3 ...
+// A4 B4 C4 D4 E4 F4 G4 H4 ...
+// A5 B5 C5 D5 E5 F5 G5 H5 ...
+// A6 B6 C6 D6 E6 F6 G6 H6 ...
+// A7 B7 C7 D7 E7 F7 G7 H7 ...
+// A8 ...
+// ...
+//
+// Packing with m = 8 yields row major output (A0 beside B0 in memory):
+// A0 B0
+// A1 B1
+// A2 B2
+// A3 B3
+// A4 B4
+// A5 B5
+// A6 B6
+// A7 B7
+// ...
+//
+// The purpose is to collect m rows of size k. Two elements of the same
+// row are arranged contiguously because madd performs an adjacent addition
+// in the kernel.
+
+template <typename Index, typename DataMapper, int Pack1, int Pack2,
+ bool Conjugate, bool PanelMode>
+struct gemm_pack_lhs<QInt16, Index, DataMapper, Pack1, Pack2, ColMajor,
+ Conjugate, PanelMode> {
+ EIGEN_DONT_INLINE void operator()(QInt16* blockA, const DataMapper& lhs,
+ Index depth, Index rows, Index stride = 0,
+ Index offset = 0);
+};
+
+template <typename Index, typename DataMapper, int Pack1, int Pack2,
+ bool Conjugate, bool PanelMode>
+EIGEN_DONT_INLINE void gemm_pack_lhs<QInt16, Index, DataMapper, Pack1, Pack2,
+ ColMajor, Conjugate, PanelMode>::
+operator()(QInt16* blockA, const DataMapper& lhs, Index depth, Index rows,
+ Index stride, Index offset) {
+ eigen_assert(stride == 0);
+ eigen_assert(offset == 0);
+
+ // Use alternate function for weird sizes
+ if (rows % 16 != 0 || depth % 16 != 0) {
+ assert(false &&
+ "only depths and rows that are a multiple of 16 are currently "
+ "supported");
+ // gemm_pack_lhs_any<QInt16, Index, DataMapper, Pack1, Pack2, ColMajor,
+ // Conjugate, PanelMode> lhs_pack;
+ // return lhs_pack(blockA, lhs, depth, rows, stride, offset);
+ }
+
+ // Get vector pointer
+ __m256i* blockA_256 = reinterpret_cast<__m256i*>(blockA);
+
+ // Pack rows in sets of 16
+ for (Index m = 0; m < rows; m += 16) {
+ // Pack depth in sets of 4
+ for (Index k = 0; k < depth; k += 4) {
+ // Load vectors
+ __m256i L_A = lhs.loadPacket(m, k);
+ __m256i L_B = lhs.loadPacket(m, k + 1);
+ __m256i L_C = lhs.loadPacket(m, k + 2);
+ __m256i L_D = lhs.loadPacket(m, k + 3);
+
+ // Rearrange the inputs as required by the kernel
+ __m256i L_AB0_AB7 = _mm256_unpacklo_epi16(L_A, L_B);
+ __m256i L_AB8_AB15 = _mm256_unpackhi_epi16(L_A, L_B);
+ __m256i L_CD0_CD7 = _mm256_unpacklo_epi16(L_C, L_D);
+ __m256i L_CD8_CD15 = _mm256_unpackhi_epi16(L_C, L_D);
+
+ __m256i L_AD0 = _mm256_permute2x128_si256(L_AB0_AB7, L_AB8_AB15, 0x20);
+ _mm256_store_si256(blockA_256++, L_AD0);
+ __m256i L_AD8 = _mm256_permute2x128_si256(L_CD0_CD7, L_CD8_CD15, 0x20);
+ _mm256_store_si256(blockA_256++, L_AD8);
+ __m256i L_AD16 = _mm256_permute2x128_si256(L_AB0_AB7, L_AB8_AB15, 0x31);
+ _mm256_store_si256(blockA_256++, L_AD16);
+ __m256i L_AD24 = _mm256_permute2x128_si256(L_CD0_CD7, L_CD8_CD15, 0x31);
+ _mm256_store_si256(blockA_256++, L_AD24);
+ }
+ }
+}
+
+// Arrange a block of the right input matrix in contiguous memory.
+//
+// Given column major input (A0 beside A1 in memory):
+// A0 B0 C0 D0 E0 F0 G0 H0 ...
+// A1 B1 C1 D1 E1 F1 G1 H1 ...
+// A2 B2 C2 D2 E2 F2 G2 H2 ...
+// A3 B3 C3 D3 E3 F3 G3 H3 ...
+// A4 B4 C4 D4 E4 F4 G4 H4 ...
+// A5 B5 C5 D5 E5 F5 G5 H5 ...
+// A6 B6 C6 D6 E6 F6 G6 H6 ...
+// A7 B7 C7 D7 E7 F7 G7 H7 ...
+// A8 ...
+// ...
+// Packing yields row major output (A0 beside A1 in memory):
+// A0 A1 A2 A3 A4 A5 A6 A7
+// B0 B1 B2 B3 B4 B5 B6 B7
+// ...
+//
+// At least two elements of the same col are arranged contiguously because
+// maddubs and madd both perform an adjacent addition in the kernel. We can
+// save work by leaving 4 adjacent elements because kr = 4.
+// The purpose is to collect n cols of size k. Two elements of the same
+// col are arranged contiguously because madd performs an adjacent addition
+// in the kernel.
+template <typename Index, typename DataMapper, int nr, bool Conjugate,
+ bool PanelMode>
+struct gemm_pack_rhs<QInt16, Index, DataMapper, nr, ColMajor, Conjugate,
+ PanelMode> {
+ EIGEN_DONT_INLINE void operator()(QInt16* blockB, const DataMapper& rhs,
+ Index depth, Index cols, Index stride = 0,
+ Index offset = 0);
+};
+
+template <typename Index, typename DataMapper, int nr, bool Conjugate,
+ bool PanelMode>
+EIGEN_DONT_INLINE void
+gemm_pack_rhs<QInt16, Index, DataMapper, nr, ColMajor, Conjugate, PanelMode>::
+operator()(QInt16* blockB, const DataMapper& rhs, Index depth, Index cols,
+ Index stride, Index offset) {
+ eigen_assert(stride == 0);
+ eigen_assert(offset == 0);
+
+ // Use alternate function for weird sizes
+ if (cols % 16 != 0 || depth % 16 != 0) {
+ assert(false &&
+ "only depths and cols that are a multiple of 16 are currently "
+ "supported");
+ // gemm_pack_rhs_any<QInt16, Index, DataMapper, nr, ColMajor, Conjugate,
+ // PanelMode> rhs_pack;
+ // return rhs_pack(blockB, rhs, depth, cols, stride, offset);
+ }
+
+ // Get vector pointer
+ __m256i* blockB_256 = reinterpret_cast<__m256i*>(blockB);
+
+ // Perform a step of the packing for 4 columns
+ __m256i R_AB_L, R_AB_H, R_CD_L, R_CD_H, R_AD_0, R_AD_4, R_AD_8, R_AD_12;
+#define PACK_STEP \
+ R_AB_L = _mm256_unpacklo_epi64(R_A, R_B); \
+ R_CD_L = _mm256_unpacklo_epi64(R_C, R_D); \
+ R_AB_H = _mm256_unpackhi_epi64(R_A, R_B); \
+ R_CD_H = _mm256_unpackhi_epi64(R_C, R_D); \
+ R_AD_0 = _mm256_permute2x128_si256(R_AB_L, R_CD_L, 0x20); \
+ R_AD_8 = _mm256_permute2x128_si256(R_AB_L, R_CD_L, 0x31); \
+ R_AD_4 = _mm256_permute2x128_si256(R_AB_H, R_CD_H, 0x20); \
+ R_AD_12 = _mm256_permute2x128_si256(R_AB_H, R_CD_H, 0x31); \
+ _mm256_store_si256(blockB_256, R_AD_0); \
+ _mm256_store_si256(blockB_256 + 4, R_AD_4); \
+ _mm256_store_si256(blockB_256 + 8, R_AD_8); \
+ _mm256_store_si256(blockB_256 + 12, R_AD_12); \
+ blockB_256++;
+
+ // Pack cols in sets of 16
+ for (Index n = 0; n < cols; n += 16) {
+ // Pack depth in sets of 16
+ for (Index k = 0; k < depth; k += 16) {
+ __m256i R_A = rhs.loadPacket(k, n);
+ __m256i R_B = rhs.loadPacket(k, n + 1);
+ __m256i R_C = rhs.loadPacket(k, n + 2);
+ __m256i R_D = rhs.loadPacket(k, n + 3);
+ PACK_STEP;
+
+ R_A = rhs.loadPacket(k, n + 4);
+ R_B = rhs.loadPacket(k, n + 5);
+ R_C = rhs.loadPacket(k, n + 6);
+ R_D = rhs.loadPacket(k, n + 7);
+ PACK_STEP;
+
+ R_A = rhs.loadPacket(k, n + 8);
+ R_B = rhs.loadPacket(k, n + 9);
+ R_C = rhs.loadPacket(k, n + 10);
+ R_D = rhs.loadPacket(k, n + 11);
+ PACK_STEP;
+
+ R_A = rhs.loadPacket(k, n + 12);
+ R_B = rhs.loadPacket(k, n + 13);
+ R_C = rhs.loadPacket(k, n + 14);
+ R_D = rhs.loadPacket(k, n + 15);
+ PACK_STEP;
+
+ blockB_256 += 12;
+ }
+ }
+#undef PACK_STEP
+}
+
+// Perform the actual multiplication on packed inputs
+template <typename Index, typename DataMapper, int mr, int nr,
+ bool ConjugateLhs, bool ConjugateRhs>
+struct gebp_kernel<QInt16, QInt16, Index, DataMapper, mr, nr, ConjugateLhs,
+ ConjugateRhs> {
+ typedef typename DataMapper::LinearMapper LinearMapper;
+
+ EIGEN_DONT_INLINE
+ void operator()(const DataMapper& res, const QInt16* blockA,
+ const QInt16* blockB, Index rows, Index depth, Index cols,
+ QInt32 alpha, Index strideA = -1, Index strideB = -1,
+ Index offsetA = 0, Index offsetB = 0);
+};
+
+template <typename Index, typename DataMapper, int mr, int nr,
+ bool ConjugateLhs, bool ConjugateRhs>
+EIGEN_DONT_INLINE void gebp_kernel<QInt16, QInt16, Index, DataMapper, mr, nr,
+ ConjugateLhs, ConjugateRhs>::
+operator()(const DataMapper& res, const QInt16* blockA, const QInt16* blockB,
+ Index rows, Index depth, Index cols, QInt32 alpha, Index strideA,
+ Index strideB, Index offsetA, Index offsetB) {
+ EIGEN_STATIC_ASSERT(!ConjugateLhs, YOU_MADE_A_PROGRAMMING_MISTAKE);
+ EIGEN_STATIC_ASSERT(!ConjugateRhs, YOU_MADE_A_PROGRAMMING_MISTAKE);
+ eigen_assert(alpha.value == 1);
+ eigen_assert(strideA == -1);
+ eigen_assert(strideB == -1);
+ eigen_assert(offsetA == 0);
+ eigen_assert(offsetB == 0);
+ eigen_assert(rows > 0);
+ eigen_assert(cols > 0);
+ eigen_assert(depth > 0);
+ eigen_assert(blockA);
+ eigen_assert(blockB);
+
+ // Use alternate function for weird sizes
+ if (rows % 16 != 0 || cols % 16 != 0 || depth % 16 != 0) {
+ assert(false &&
+ "only depths, cols and rows that are a multiple of 16 are currently "
+ "supported");
+ // gebp_kernel_any<QInt16, QInt16, Index, DataMapper, mr, nr, ConjugateLhs,
+ // ConjugateRhs> gebp;
+ // return gebp(res, blockA, blockB, rows, depth, cols, alpha, strideA,
+ // strideB, offsetA, offsetB);
+ }
+
+ // Create result block
+ QInt32* blockO = aligned_new<QInt32>(16 * 16);
+ memset(blockO, 0, 16 * 16 * sizeof(QInt32));
+
+ // Get vectorized pointers
+ __m256i* blockO_256 = reinterpret_cast<__m256i*>(blockO);
+ const __m256i* blockA_256 = reinterpret_cast<const __m256i*>(blockA);
+ const __m256i* blockB_256 = reinterpret_cast<const __m256i*>(blockB);
+
+ // Loop over blocks of 16 columns
+ for (Index n = 0; n < cols; n += 16) {
+ // Reset index into blockA
+ Index indexL = 0;
+ // Loop over blocks of 16 rows
+ for (Index m = 0; m < rows; m += 16) {
+ // Reset index into blockB
+ Index indexR = n / 16 * depth;
+ // Loop over blocks of 4 on depth
+ for (Index k = 0; k < depth; k += 4) {
+ // Load inputs
+ __m256i L_AD0 = blockA_256[indexL++];
+ __m256i L_AD8 = blockA_256[indexL++];
+ __m256i L_EH0 = blockA_256[indexL++];
+ __m256i L_EH8 = blockA_256[indexL++];
+
+ __m256i R_AH0 = blockB_256[indexR++];
+ __m256i R_AH4 = blockB_256[indexR++];
+ __m256i R_AH8 = blockB_256[indexR++];
+ __m256i R_AH12 = blockB_256[indexR++];
+
+ // Declare variables used in COMPUTE_STEP
+ __m256i P_32_A, P_32_B, P_32;
+
+#define COMPUTE_STEP(R_INPUT_A, R_INPUT_B, OFFSET) \
+ P_32_A = _mm256_madd_epi16(R_INPUT_A, L_AD0); \
+ P_32_B = _mm256_madd_epi16(R_INPUT_B, L_AD8); \
+ P_32 = _mm256_add_epi32(P_32_A, P_32_B); \
+ _mm256_store_si256( \
+ blockO_256 + 2 * OFFSET, \
+ _mm256_add_epi32(_mm256_load_si256(blockO_256 + 2 * OFFSET), P_32)); \
+ \
+ P_32_A = _mm256_madd_epi16(R_INPUT_A, L_EH0); \
+ P_32_B = _mm256_madd_epi16(R_INPUT_B, L_EH8); \
+ P_32 = _mm256_add_epi32(P_32_A, P_32_B); \
+ _mm256_store_si256( \
+ blockO_256 + 2 * OFFSET + 1, \
+ _mm256_add_epi32(_mm256_load_si256(blockO_256 + 2 * OFFSET + 1), P_32));
+
+ // Permute and shuffle to copy a single value across the entire vector
+ // Then compute the multiplication
+ // Replicate lower 128-bits of R_AH0 across both lanes
+ __m256i R_AH0_ = _mm256_permute2x128_si256(R_AH0, R_AH0, 0x00);
+ // Copy first two elements of R_AH0 across entire vector
+ __m256i R_AD0 = _mm256_shuffle_epi32(R_AH0_, 0x00);
+ // Copy second two elements of R_AH0 across entire vector
+ __m256i R_EH0 = _mm256_shuffle_epi32(R_AH0_, 0x55);
+
+ COMPUTE_STEP(R_AD0, R_EH0, 0);
+ __m256i R_AD1 = _mm256_shuffle_epi32(R_AH0_, 0xAA);
+ __m256i R_EH1 = _mm256_shuffle_epi32(R_AH0_, 0xFF);
+ COMPUTE_STEP(R_AD1, R_EH1, 1);
+
+ // Replicate upper 128-bits of R_AH0 across both lanes
+ R_AH0_ = _mm256_permute2x128_si256(R_AH0, R_AH0, 0x11);
+ __m256i R_AD2 = _mm256_shuffle_epi32(R_AH0_, 0x00);
+ __m256i R_EH2 = _mm256_shuffle_epi32(R_AH0_, 0x55);
+ COMPUTE_STEP(R_AD2, R_EH2, 2);
+ __m256i R_AD3 = _mm256_shuffle_epi32(R_AH0_, 0xAA);
+ __m256i R_EH3 = _mm256_shuffle_epi32(R_AH0_, 0xFF);
+ COMPUTE_STEP(R_AD3, R_EH3, 3);
+
+ R_AH0_ = _mm256_permute2x128_si256(R_AH4, R_AH4, 0x00);
+ R_AD0 = _mm256_shuffle_epi32(R_AH0_, 0x00);
+ R_EH0 = _mm256_shuffle_epi32(R_AH0_, 0x55);
+ COMPUTE_STEP(R_AD0, R_EH0, 4);
+ R_AD1 = _mm256_shuffle_epi32(R_AH0_, 0xAA);
+ R_EH1 = _mm256_shuffle_epi32(R_AH0_, 0xFF);
+ COMPUTE_STEP(R_AD1, R_EH1, 5);
+ R_AH0_ = _mm256_permute2x128_si256(R_AH4, R_AH4, 0x11);
+ R_AD2 = _mm256_shuffle_epi32(R_AH0_, 0x00);
+ R_EH2 = _mm256_shuffle_epi32(R_AH0_, 0x55);
+ COMPUTE_STEP(R_AD2, R_EH2, 6);
+ R_AD3 = _mm256_shuffle_epi32(R_AH0_, 0xAA);
+ R_EH3 = _mm256_shuffle_epi32(R_AH0_, 0xFF);
+ COMPUTE_STEP(R_AD3, R_EH3, 7);
+
+ R_AH0_ = _mm256_permute2x128_si256(R_AH8, R_AH8, 0x00);
+ R_AD0 = _mm256_shuffle_epi32(R_AH0_, 0x00);
+ R_EH0 = _mm256_shuffle_epi32(R_AH0_, 0x55);
+ COMPUTE_STEP(R_AD0, R_EH0, 8);
+ R_AD1 = _mm256_shuffle_epi32(R_AH0_, 0xAA);
+ R_EH1 = _mm256_shuffle_epi32(R_AH0_, 0xFF);
+ COMPUTE_STEP(R_AD1, R_EH1, 9);
+ R_AH0_ = _mm256_permute2x128_si256(R_AH8, R_AH8, 0x11);
+ R_AD2 = _mm256_shuffle_epi32(R_AH0_, 0x00);
+ R_EH2 = _mm256_shuffle_epi32(R_AH0_, 0x55);
+ COMPUTE_STEP(R_AD2, R_EH2, 10);
+ R_AD3 = _mm256_shuffle_epi32(R_AH0_, 0xAA);
+ R_EH3 = _mm256_shuffle_epi32(R_AH0_, 0xFF);
+ COMPUTE_STEP(R_AD3, R_EH3, 11);
+
+ R_AH0_ = _mm256_permute2x128_si256(R_AH12, R_AH12, 0x00);
+ R_AD0 = _mm256_shuffle_epi32(R_AH0_, 0x00);
+ R_EH0 = _mm256_shuffle_epi32(R_AH0_, 0x55);
+ COMPUTE_STEP(R_AD0, R_EH0, 12);
+ R_AD1 = _mm256_shuffle_epi32(R_AH0_, 0xAA);
+ R_EH1 = _mm256_shuffle_epi32(R_AH0_, 0xFF);
+ COMPUTE_STEP(R_AD1, R_EH1, 13);
+ R_AH0_ = _mm256_permute2x128_si256(R_AH12, R_AH12, 0x11);
+ R_AD2 = _mm256_shuffle_epi32(R_AH0_, 0x00);
+ R_EH2 = _mm256_shuffle_epi32(R_AH0_, 0x55);
+ COMPUTE_STEP(R_AD2, R_EH2, 14);
+ R_AD3 = _mm256_shuffle_epi32(R_AH0_, 0xAA);
+ R_EH3 = _mm256_shuffle_epi32(R_AH0_, 0xFF);
+ COMPUTE_STEP(R_AD3, R_EH3, 15);
+
+#undef COMPUTE_STEP
+ }
+
+ // Transfer the results to the result matrix
+ Index i = 0;
+ for (Index j = n; j < n + 16; j++) {
+ LinearMapper r0 = res.getLinearMapper(m, j);
+ LinearMapper r1 = res.getLinearMapper(m + 8, j);
+
+ r0.storePacket(0, _mm256_add_epi32(blockO_256[i++], r0.loadPacket(0)));
+ r1.storePacket(0, _mm256_add_epi32(blockO_256[i++], r1.loadPacket(0)));
+ }
+
+ // Zero the result block so it can be reused
+ memset(blockO, 0, 16 * 16 * sizeof(QInt32));
+ }
+ }
+ aligned_delete(blockO, 16 * 16);
+}
+
+#endif
+
+// AVX2 optimized implementation of Mat-Mat product.
// LHS is encoded using signed 8-bit integers.
// RHS is encoded using unsigned 8-bit integers.
#ifdef EIGEN_USE_OPTIMIZED_INT8_UINT8_MAT_MAT_PRODUCT
@@ -1751,4 +2227,4 @@ void gebp_kernel<QInt8, QUInt8, Index, DataMapper, mr, nr, ConjugateLhs, Conjuga
} // namespace internal
} // namespace Eigen
-#endif // EIGEN_CXX11_FIXED_POINT_MAT_MAT_PRODUCT_AVX2_H
+#endif // CXX11_SRC_FIXEDPOINT_MATMATPRODUCTAVX2_H_
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductNEON.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductNEON.h
index 99894cafb5..9cd3157023 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductNEON.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductNEON.h
@@ -8,9 +8,8 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-#ifndef EIGEN_CXX11_FIXED_POINT_MAT_MAT_PRODUCT_NEON_H
-#define EIGEN_CXX11_FIXED_POINT_MAT_MAT_PRODUCT_NEON_H
-
+#ifndef CXX11_SRC_FIXEDPOINT_MATMATPRODUCTNEON_H_
+#define CXX11_SRC_FIXEDPOINT_MATMATPRODUCTNEON_H_
namespace Eigen {
namespace internal {
@@ -90,6 +89,4 @@ void gebp_kernel<QInt8, QUInt8, Index, DataMapper, mr, nr, ConjugateLhs, Conjuga
} // namespace internal
} // namespace Eigen
-
-
-#endif // EIGEN_CXX11_FIXED_POINT_MAT_MAT_PRODUCT_NEON_H
+#endif // CXX11_SRC_FIXEDPOINT_MATMATPRODUCTNEON_H_
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatVecProduct.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatVecProduct.h
index 18b5085b89..ad11d3d44b 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatVecProduct.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatVecProduct.h
@@ -7,9 +7,8 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-#ifndef EIGEN_CXX11_FIXED_POINT_MAT_VEC_PRODUCT_H
-#define EIGEN_CXX11_FIXED_POINT_MAT_VEC_PRODUCT_H
-
+#ifndef CXX11_SRC_FIXEDPOINT_MATVECPRODUCT_H_
+#define CXX11_SRC_FIXEDPOINT_MATVECPRODUCT_H_
namespace Eigen {
namespace internal {
@@ -47,6 +46,36 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,QInt8,LhsMapper,ColMa
}
}
+// Mat-Vec product
+// Both lhs and rhs are encoded as 16bit signed integers
+template <typename Index, typename LhsMapper, bool ConjugateLhs,
+ typename RhsMapper, bool ConjugateRhs, int Version>
+struct general_matrix_vector_product<Index, QInt16, LhsMapper, ColMajor,
+ ConjugateLhs, QInt16, RhsMapper,
+ ConjugateRhs, Version> {
+ EIGEN_DONT_INLINE static void run(Index rows, Index cols,
+ const LhsMapper& lhs, const RhsMapper& rhs,
+ QInt32* res, Index resIncr, QInt16 alpha);
+};
+
+template <typename Index, typename LhsMapper, bool ConjugateLhs,
+ typename RhsMapper, bool ConjugateRhs, int Version>
+EIGEN_DONT_INLINE void general_matrix_vector_product<
+ Index, QInt16, LhsMapper, ColMajor, ConjugateLhs, QInt16, RhsMapper,
+ ConjugateRhs, Version>::run(Index rows, Index cols, const LhsMapper& lhs,
+ const RhsMapper& rhs, QInt32* res,
+ Index resIncr, QInt16 alpha) {
+ eigen_assert(alpha.value == 1);
+ eigen_assert(resIncr == 1);
+ eigen_assert(rows > 0);
+ eigen_assert(cols > 0);
+
+ for (Index i = 0; i < rows; ++i) {
+ for (Index j = 0; j < cols; ++j) {
+ res[i] += lhs(i, j) * rhs(j, 0);
+ }
+ }
+}
// Mat-Vec product
// The lhs is encoded using 8bit signed integers, the rhs using 8bit unsigned integers
@@ -118,6 +147,4 @@ EIGEN_DONT_INLINE void general_matrix_vector_product<Index,QUInt8,LhsMapper,ColM
} // namespace internal
} // namespace Eigen
-
-
-#endif // EIGEN_CXX11_FIXED_POINT_MAT_VEC_PRODUCT_H
+#endif // CXX11_SRC_FIXEDPOINT_MATVECPRODUCT_H_
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
index cb1636256d..3abd4ee49c 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX2.h
@@ -1,6 +1,5 @@
-#ifndef EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX2_H_
-#define EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX2_H_
-
+#ifndef CXX11_SRC_FIXEDPOINT_PACKETMATHAVX2_H_
+#define CXX11_SRC_FIXEDPOINT_PACKETMATHAVX2_H_
#ifdef _MSC_VER
#include <immintrin.h>
@@ -29,7 +28,6 @@ inline int _mm256_extract_epi8_N1(const __m256i X)
return _mm_extract_epi8(_mm256_extractf128_si256((X), 1 >> 4), 1 % 16);
}
-
namespace Eigen {
namespace internal {
@@ -502,4 +500,4 @@ struct functor_traits<scalar_product_op<QInt32, double>> {
} // end namespace internal
} // end namespace Eigen
-#endif // EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX2_H_
+#endif // CXX11_SRC_FIXEDPOINT_PACKETMATHAVX2_H_
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h
index 8f9906dbf9..2092ce1d4c 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/PacketMathAVX512.h
@@ -1,5 +1,5 @@
-#ifndef EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX512_H_
-#define EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX512_H_
+#ifndef CXX11_SRC_FIXEDPOINT_PACKETMATHAVX512_H_
+#define CXX11_SRC_FIXEDPOINT_PACKETMATHAVX512_H_
#include "PacketMathAVX2.h"
@@ -542,4 +542,4 @@ EIGEN_STRONG_INLINE QInt8 predux_max<Packet64q8i>(const Packet64q8i& a) {
} // end namespace internal
} // end namespace Eigen
-#endif // EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_PACKETMATHAVX512_H_
+#endif // CXX11_SRC_FIXEDPOINT_PACKETMATHAVX512_H_
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h
index 7b4ecc752f..9561d6a338 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX2.h
@@ -1,5 +1,5 @@
-#ifndef EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_
-#define EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_
+#ifndef CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_
+#define CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_
namespace Eigen {
namespace internal {
@@ -52,8 +52,16 @@ template <>
EIGEN_STRONG_INLINE Packet32q8u
pcast<Packet8q32i, Packet32q8u>(const Packet8q32i& a, const Packet8q32i& b,
const Packet8q32i& c, const Packet8q32i& d) {
+ // _mm256_packus_epi32 trims negative numbers to 0 but we can't allow numbers
+ // that are too large because _mm256_packus_epi16 expects signed input
+ // (example of problem input: 0x11111111, which saturates to 0xffff = -1,
+ // which saturates to 0).
+ const __m256i a_clip = _mm256_min_epi32(a, _mm256_set1_epi32(255));
+ const __m256i b_clip = _mm256_min_epi32(b, _mm256_set1_epi32(255));
+ const __m256i c_clip = _mm256_min_epi32(c, _mm256_set1_epi32(255));
+ const __m256i d_clip = _mm256_min_epi32(d, _mm256_set1_epi32(255));
const __m256i converted = _mm256_packus_epi16(
- _mm256_packs_epi32(a.val, b.val), _mm256_packs_epi32(c.val, d.val));
+ _mm256_packus_epi32(a_clip, b_clip), _mm256_packus_epi32(c_clip, d_clip));
// Since packus does not cross 128 bit lane boundaries,
// we have to permute to properly order the final result.
const __m256i permute_mask = _mm256_set_epi32(7, 3, 6, 2, 5, 1, 4, 0);
@@ -63,4 +71,4 @@ pcast<Packet8q32i, Packet32q8u>(const Packet8q32i& a, const Packet8q32i& b,
} // end namespace internal
} // end namespace Eigen
-#endif // EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_
+#endif // CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX2_H_
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h
index 26735743d4..a09eac6707 100644
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h
+++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/TypeCastingAVX512.h
@@ -1,5 +1,5 @@
-#ifndef EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_
-#define EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_
+#ifndef CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_
+#define CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_
namespace Eigen {
namespace internal {
@@ -132,8 +132,15 @@ pcast<Packet16q32i, Packet64q8i>(const Packet16q32i& a,
const Packet16q32i& b,
const Packet16q32i& c,
const Packet16q32i& d) {
- __m512i converted = _mm512_packs_epi16(_mm512_packs_epi32(a.val, b.val),
- _mm512_packs_epi32(c.val, d.val));
+ __m128i a_part = _mm512_cvtsepi32_epi8(a);
+ __m128i b_part = _mm512_cvtsepi32_epi8(b);
+ __m128i c_part = _mm512_cvtsepi32_epi8(c);
+ __m128i d_part = _mm512_cvtsepi32_epi8(d);
+ __m256i ab =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(a_part), b_part, 1);
+ __m256i cd =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(c_part), d_part, 1);
+ __m512i converted = _mm512_inserti64x4(_mm512_castsi256_si512(ab), cd, 1);
return converted;
}
@@ -141,7 +148,10 @@ template <>
EIGEN_STRONG_INLINE Packet32q16i
pcast<Packet16q32i, Packet32q16i>(const Packet16q32i& a,
const Packet16q32i& b) {
- __m512i converted = _mm512_packs_epi32(a.val, b.val);
+ __m256i a_part = _mm512_cvtsepi32_epi16(a);
+ __m256i b_part = _mm512_cvtsepi32_epi16(b);
+ __m512i converted =
+ _mm512_inserti64x4(_mm512_castsi256_si512(a_part), b_part, 1);
return converted;
}
@@ -154,22 +164,45 @@ template <>
EIGEN_STRONG_INLINE Packet64q8u
pcast<Packet16q32i, Packet64q8u>(const Packet16q32i& a, const Packet16q32i& b,
const Packet16q32i& c, const Packet16q32i& d) {
- const __m512i converted = _mm512_packus_epi16(
- _mm512_packus_epi32(a.val, b.val), _mm512_packus_epi32(c.val, d.val));
+ // Brute-force saturation since there isn't a pack operation for unsigned
+ // numbers that keeps the elements in order.
+ __m128i a_part = _mm512_cvtepi32_epi8(_mm512_max_epi32(
+ _mm512_min_epi32(a, _mm512_set1_epi32(255)), _mm512_setzero_si512()));
+ __m128i b_part = _mm512_cvtepi32_epi8(_mm512_max_epi32(
+ _mm512_min_epi32(b, _mm512_set1_epi32(255)), _mm512_setzero_si512()));
+ __m128i c_part = _mm512_cvtepi32_epi8(_mm512_max_epi32(
+ _mm512_min_epi32(c, _mm512_set1_epi32(255)), _mm512_setzero_si512()));
+ __m128i d_part = _mm512_cvtepi32_epi8(_mm512_max_epi32(
+ _mm512_min_epi32(d, _mm512_set1_epi32(255)), _mm512_setzero_si512()));
+ __m256i ab =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(a_part), b_part, 1);
+ __m256i cd =
+ _mm256_inserti128_si256(_mm256_castsi128_si256(c_part), d_part, 1);
+ __m512i converted = _mm512_inserti64x4(_mm512_castsi256_si512(ab), cd, 1);
return converted;
}
+#if 0
+// The type Packet32q16u does not exist for AVX-512 yet
template <>
struct type_casting_traits<QInt32, QUInt16> {
enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
};
-#if 0
template <>
EIGEN_STRONG_INLINE Packet32q16u
pcast<Packet16q32i, Packet32q16u>(const Packet16q32i& a,
const Packet16q32i& b) {
- const __m512i converted = _mm512_packus_epi32(a.val, b.val);
+ // Brute-force saturation since there isn't a pack operation for unsigned
+ // numbers that keeps the elements in order.
+ __m256i a_part =
+ _mm512_cvtepi32_epi16(_mm512_max_epi32(
+ _mm512_min_epi32(a, _mm512_set1_epi32(65535)), _mm512_setzero_si512()));
+ __m256i b_part = _mm512_cvtepi32_epi16(
+ _mm512_max_epi32(_mm512_min_epi32(b, _mm512_set1_epi32(65535)),
+ _mm512_setzero_si512()));
+ __m512i converted =
+ _mm512_inserti64x4(_mm512_castsi256_si512(a_part), b_part, 1);
return converted;
}
#endif
@@ -177,4 +210,4 @@ pcast<Packet16q32i, Packet32q16u>(const Packet16q32i& a,
} // end namespace internal
} // end namespace Eigen
-#endif // EIGEN3_UNSUPPORTED_EIGEN_CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_
+#endif // CXX11_SRC_FIXEDPOINT_TYPECASTINGAVX512_H_
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Activations.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Activations.h
deleted file mode 100644
index cbcce9e282..0000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Activations.h
+++ /dev/null
@@ -1,116 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-#ifndef EIGEN_CXX11_NEURAL_NETWORKS_ACTIVATIONS_H
-#define EIGEN_CXX11_NEURAL_NETWORKS_ACTIVATIONS_H
-
-namespace Eigen {
-
-/** scalar_sigmoid_fast_derivative_op
- * \ingroup CXX11_NeuralNetworks_Module
- * \brief Template functor to compute the fast derivative of a sigmoid
- *
- * Input should be the backpropagated gradient.
- *
- * \sa class CwiseUnaryOp, Cwise::sigmoid_fast_derivative()
- */
-template <typename T>
-struct scalar_sigmoid_fast_derivative_op {
- EIGEN_EMPTY_STRUCT_CTOR(scalar_sigmoid_fast_derivative_op)
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& y) const {
- const T one = T(1);
- return (one - y) * y;
- }
-
- template <typename Packet>
- inline Packet packetOp(const Packet& y) const {
- const Packet one = internal::pset1<Packet>(1);
- return internal::pmul(internal::psub(one, y), y);
- }
-};
-
-namespace internal {
-template <typename T>
-struct functor_traits<scalar_sigmoid_fast_derivative_op<T> > {
- enum {
- Cost = NumTraits<T>::AddCost * 2 + NumTraits<T>::MulCost,
- PacketAccess = packet_traits<T>::HasAdd && packet_traits<T>::HasMul &&
- packet_traits<T>::HasNegate
- };
-};
-} // namespace internal
-
-/** scalar_tanh_fast_derivative_op
- * \ingroup CXX11_NeuralNetworks_Module
- * \brief Template functor to compute the fast derivative of a tanh
- *
- * Input should be the backpropagated gradient.
- *
- * \sa class CwiseUnaryOp, Cwise::tanh_fast_derivative()
- */
-template <typename T>
-struct scalar_tanh_fast_derivative_op {
- EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_fast_derivative_op)
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T operator()(const T& y) const {
- const T one = T(1);
- return one - (y * y);
- }
-
- template <typename Packet>
- inline Packet packetOp(const Packet& y) const {
- const Packet one = internal::pset1<Packet>(1);
- return internal::psub(one, internal::pmul(y, y));
- }
-};
-
-namespace internal {
-template <typename T>
-struct functor_traits<scalar_tanh_fast_derivative_op<T> > {
- enum {
- Cost = NumTraits<T>::AddCost * 2 + NumTraits<T>::MulCost * 1,
- PacketAccess = packet_traits<T>::HasAdd && packet_traits<T>::HasMul &&
- packet_traits<T>::HasNegate
- };
-};
-} // namespace internal
-
-/**
- * \ingroup CXX11_NeuralNetworks_Module
- * \brief Template functor to clip the magnitude of the first scalar.
- *
- * \sa class CwiseBinaryOp, MatrixBase::Clip
- */
-template <typename Scalar>
-struct scalar_clip_op {
- EIGEN_EMPTY_STRUCT_CTOR(scalar_clip_op)
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar
- operator()(const Scalar& a, const Scalar& b) const {
- return numext::mini(numext::maxi(a, -b), b);
- }
- template <typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Packet
- packetOp(const Packet& a, const Packet& b) const {
- return internal::pmin(internal::pmax(a, internal::pnegate(b)), b);
- }
-};
-
-namespace internal {
-template <typename Scalar>
-struct functor_traits<scalar_clip_op<Scalar> > {
- enum {
- Cost = NumTraits<Scalar>::AddCost * 3,
- PacketAccess = packet_traits<Scalar>::HasMax &&
- packet_traits<Scalar>::HasMin &&
- packet_traits<Scalar>::HasNegate
- };
-};
-} // namespace internal
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_NEURAL_NETWORKS_ACTIVATIONS_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Attention.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Attention.h
deleted file mode 100644
index d4bc7a3515..0000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Attention.h
+++ /dev/null
@@ -1,209 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-#ifndef EIGEN_CXX11_NEURAL_NETWORKS_ATTENTION_H
-#define EIGEN_CXX11_NEURAL_NETWORKS_ATTENTION_H
-
-namespace Eigen {
-
-/** ExtractGlimpses
- * \ingroup CXX11_NeuralNetworks_Module
- *
- * \brief Extract glimpses from an input tensor.
- *
- * The input parameter is expected to be a col-major tensor with a rank of 4 (depth, x, y, and batch).
- * The width and height parameters specify the extension of the returned glimpses.
- * The offsets parameter specifies the x, y locations of the center of the glimpses relative to the center of the input image. The vector is expected to contain one IndexPair for each image in the batch dimension.
- * The normalized boolean indicates if incoming coordinates are normalized so that 0.0 and 1.0 correspond to the minimum and maximum of each height and width dimension.
- * The centered boolean indicates if incoming coordinates are centered relative to the image, in which case -1.0 and 1.0 correspond to minimum and maximum of each dimension while 0.0 corresponds to the center.
- *
- * The result can be assigned to a tensor of rank equal to that of the input. The result will be laid out in col-major order (depth, x, y, batch).
- * The dimensions of the result will be equal to the dimensions of the input except for width and height which will be equal to the requested glimpse size.
- */
-namespace {
-template <typename Index>
-struct GlimpseExtractionOp {
- GlimpseExtractionOp(const Index width, const Index height,
- const std::vector<IndexPair<float> >& offsets,
- const bool normalized,
- const bool centered,
- const bool uniform_noise) :
- width_(width), height_(height), offsets_(offsets),
- normalized_(normalized), centered_(centered), uniform_noise_(uniform_noise) { }
-
- template <typename Input>
- DSizes<Index, 4> dimensions(const Input& input) const {
- typedef typename internal::traits<Input>::Index IndexType;
- typedef TensorRef<Tensor<typename internal::traits<Input>::Scalar, 4,
- internal::traits<Input>::Layout, IndexType> > Ref;
- Ref in(input);
-
- DSizes<Index, 4> dims = in.dimensions();
-
- dims[0] = in.dimension(0);
- dims[1] = width_;
- dims[2] = height_;
- dims[3] = in.dimension(3);
- return dims;
- }
-
- template <typename Input, typename Output, typename Device>
- EIGEN_DEVICE_FUNC
- void eval(const Input& input, Output& output, const Device& device) const
- {
- typedef typename internal::traits<Input>::Index IndexType;
- typedef TensorRef<Tensor<typename internal::traits<Input>::Scalar, 4,
- internal::traits<Input>::Layout, IndexType> > Ref;
- Ref in(input);
-
- const Index num_channels = in.dimension(0);
- const Index input_width = in.dimension(1);
- const Index input_height = in.dimension(2);
- const Index batch_size = in.dimension(3);
- eigen_assert(input_width > 0);
- eigen_assert(input_height > 0);
-
- for (Index i = 0; i < batch_size; ++i) {
- float x = offsets_[i].first, y = offsets_[i].second;
-
- // Un-normalize coordinates back to pixel space if normalized.
- if (normalized_) {
- x *= input_width;
- y *= input_height;
- }
- // Un-center if coordinates are centered on the image center.
- if (centered_) {
- x /= 2.0f;
- y /= 2.0f;
- x += input_width / 2.0f;
- y += input_height / 2.0f;
- }
- // Remove half of the glimpse window.
- x -= width_ / 2.0f;
- y -= height_ / 2.0f;
-
- const Index offset_x = (Index) x;
- const Index offset_y = (Index) y;
- Index glimpse_width = width_;
- Index glimpse_height = height_;
- bool partial_overlap = false;
- DSizes<Index, 3> slice_offset(0, offset_x, offset_y);
- DSizes<Index, 3> slice_extent(num_channels, width_, height_);
- DSizes<Index, 3> base_offset(0, 0, 0);
-
- if (offset_x < 0) {
- slice_offset[1] = 0;
- glimpse_width = (std::max<Index>)(0, width_ + offset_x);
- slice_extent[1] = glimpse_width;
- base_offset[1] = width_ - glimpse_width;
- partial_overlap = true;
- } else if (offset_x + width_ >= input_width) {
- glimpse_width = (std::max<Index>)(0, input_width - offset_x);
- slice_extent[1] = glimpse_width;
- partial_overlap = true;
- }
- if (offset_y < 0) {
- slice_offset[2] = 0;
- glimpse_height = (std::max<Index>)(0, height_ + offset_y);
- slice_extent[2] = glimpse_height;
- base_offset[2] = height_ - glimpse_height;
- partial_overlap = true;
- } else if (offset_y + height_ >= input_height) {
- glimpse_height = (std::max<Index>)(0, input_height - offset_y);
- slice_extent[2] = glimpse_height;
- partial_overlap = true;
- }
- slice_extent[1] = std::min<Index>(input_width, slice_extent[1]);
- slice_extent[2] = std::min<Index>(input_height, slice_extent[2]);
-
- if (partial_overlap) {
- if (uniform_noise_) {
- // Initialize the glimpse with uniform noise.
- typedef typename internal::remove_const<
- typename internal::traits<Input>::Scalar>::type Scalar;
- TensorFixedSize<Scalar, Sizes<> > mini;
- mini.device(device) = input.template chip<3>(i).minimum();
- TensorFixedSize<float, Sizes<> > range;
- range.device(device) =
- (input.template chip<3>(i).maximum() - mini).template cast<float>();
-
- DSizes<Index, 3> glimpse_size(num_channels, width_, height_);
- TensorMap<Tensor<float, 3> > tmp(NULL, glimpse_size);
- output.template chip<3>(i).device(device) =
- mini.reshape(Sizes<1,1,1>()).broadcast(glimpse_size) +
- (tmp.random() * range.reshape(Sizes<1,1,1>()).broadcast(glimpse_size)).template cast<Scalar>();
- } else {
- // Initialize the glimpse with white noise: compute the mean and sigma
- // of each channel, and use them to shape the gaussian.
- DSizes<Index, 2> glimpse_size(width_, height_);
- DSizes<Index, 2> input_size(input_width, input_height);
- typedef typename internal::remove_const<
- typename internal::traits<Input>::Scalar>::type Scalar;
-
- for (int j = 0; j < num_channels; ++j) {
- TensorFixedSize<Scalar, Sizes<> > mean;
- mean.device(device) = input.template chip<3>(i).template chip<0>(j).template cast<float>().mean();
- TensorFixedSize<float, Sizes<> > sigma;
- sigma.device(device) =
- (input.template chip<3>(i).template chip<0>(j).template cast<float>() - mean.reshape(Sizes<1,1>()).broadcast(input_size)).square().mean().sqrt();
- TensorFixedSize<Scalar, Sizes<> > mini;
- mini.device(device) = input.template chip<3>(i).template chip<0>(j).minimum();
- TensorFixedSize<float, Sizes<> > maxi;
- maxi.device(device) = input.template chip<3>(i).template chip<0>(j).maximum();
-
- TensorMap<Tensor<float, 2> > tmp(NULL, glimpse_size);
- output.template chip<3>(i).template chip<0>(j).device(device) =
- (mean.reshape(Sizes<1,1>()).broadcast(glimpse_size) +
- (tmp.random(internal::NormalRandomGenerator<float>()) * sigma.reshape(Sizes<1,1>()).broadcast(glimpse_size)).template cast<Scalar>()).cwiseMin(maxi.reshape(Sizes<1,1>()).broadcast(glimpse_size)).cwiseMax(mini.reshape(Sizes<1,1>()).broadcast(glimpse_size));
- }
- }
-
- // Copy the part of the glimpse that cover the input image if any.
- if (glimpse_width == 0 || glimpse_height == 0) {
- continue;
- }
- output.template chip<3>(i).slice(base_offset, slice_extent).device(device) = input.template chip<3>(i).slice(slice_offset, slice_extent);
- } else {
- output.template chip<3>(i).device(device) = input.template chip<3>(i).slice(slice_offset, slice_extent);
- }
- }
- }
-
- private:
- const Index width_;
- const Index height_;
- const std::vector<IndexPair<float> > offsets_;
- const bool normalized_;
- const bool centered_;
- const bool uniform_noise_;
-};
-}
-
-
-template <typename Input>
-EIGEN_ALWAYS_INLINE
-static const TensorCustomUnaryOp<const GlimpseExtractionOp<typename internal::traits<Input>::Index>, const Input>
-ExtractGlimpses(const Input& input,
- const typename internal::traits<Input>::Index width,
- const typename internal::traits<Input>::Index height,
- const std::vector<IndexPair<float> >& offsets,
- const bool normalized = true, const bool centered = true,
- const bool uniform_noise = true)
-{
- EIGEN_STATIC_ASSERT(internal::traits<Input>::Layout == ColMajor, YOU_MADE_A_PROGRAMMING_MISTAKE);
- EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 4, YOU_MADE_A_PROGRAMMING_MISTAKE);
-
- typedef typename internal::traits<Input>::Index Index;
- const GlimpseExtractionOp<Index> op(width, height, offsets, normalized,
- centered, uniform_noise);
- return input.customOp(op);
-}
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_NEURAL_NETWORKS_ATTENTION_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/BackwardCuboidConvolutions.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/BackwardCuboidConvolutions.h
deleted file mode 100644
index 12ce23444c..0000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/BackwardCuboidConvolutions.h
+++ /dev/null
@@ -1,523 +0,0 @@
-#ifndef EIGEN_CXX11_NEURAL_NETWORKS_BACKWARD_CUBOID_CONVOLUTIONS_H
-#define EIGEN_CXX11_NEURAL_NETWORKS_BACKWARD_CUBOID_CONVOLUTIONS_H
-
-#include "Patch3d.h"
-
-namespace Eigen {
-
-/** CuboidConvolutionBackwardInput
- * \ingroup CXX11_NeuralNetworks_Module
- *
- * \brief Computes the backprop for the input of a 3D convolution.
- *
- * The output_backward parameter is expected to be a tensor with a rank of 4 or more (channels, depth, height, width, and optionally others)
- * The kernel parameter is expected to be a 5D tensor (filters, channels, kernel_depth, kernel_height, kernel_width)
- * output_backward and kernel have to be in the same layout.
- *
- * The dimensions of the result will be filters, depth, height, width (and others if applicable).
- *
- * It is possible to swap the order of the depth, width and height dimensions provided that the same order is used in the input, the kernel, and the output.
- *
- * All dimension orders above are given for col-major, and should be reversed for row-major.
- */
-
-template <typename OutputBackward, typename Kernel>
-EIGEN_ALWAYS_INLINE static const typename internal::conditional<
- internal::traits<OutputBackward>::Layout == ColMajor,
- TensorReshapingOp<
- const DSizes<typename internal::traits<OutputBackward>::Index,
- internal::traits<OutputBackward>::NumDimensions>,
- const TensorContractionOp<
- const array< IndexPair<typename internal::traits<OutputBackward>::Index>, 2>,
- const TensorReshapingOp<
- const DSizes< typename internal::traits<OutputBackward>::Index, 3>,
- const TensorReverseOp<const array<bool, 5>, const Kernel>
- >,
- const TensorReshapingOp<
- const DSizes< typename internal::traits<OutputBackward>::Index, 3>,
- const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const OutputBackward>
- >
- >
- >,
- TensorReshapingOp<
- const DSizes<typename internal::traits<OutputBackward>::Index,
- internal::traits<OutputBackward>::NumDimensions>,
- const TensorContractionOp<
- const array< IndexPair<typename internal::traits<OutputBackward>::Index>, 2>,
- const TensorReshapingOp<
- const DSizes< typename internal::traits<OutputBackward>::Index, 3>,
- const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const OutputBackward>
- >,
- const TensorReshapingOp<
- const DSizes<typename internal::traits<OutputBackward>::Index, 3>,
- const TensorReverseOp<const array<bool, 5>, const Kernel>
- >
- >
- >
->::type
-CuboidConvolutionBackwardInput(
- const Kernel& kernel, const OutputBackward& output_backward,
- typename internal::traits<OutputBackward>::Index inputPlanes,
- typename internal::traits<OutputBackward>::Index inputRows,
- typename internal::traits<OutputBackward>::Index inputCols,
- const DenseIndex stridePlanes = 1, const DenseIndex strideRows = 1,
- const DenseIndex strideCols = 1) {
- typedef typename internal::traits<OutputBackward>::Index TensorIndex;
- const TensorRef<const Tensor<typename internal::traits<Kernel>::Scalar, internal::traits<Kernel>::NumDimensions, internal::traits<Kernel>::Layout, TensorIndex> > kern(kernel);
- const TensorRef<const Tensor<typename internal::traits<OutputBackward>::Scalar, internal::traits<OutputBackward>::NumDimensions, internal::traits<OutputBackward>::Layout, TensorIndex> > out(output_backward);
-
- EIGEN_STATIC_ASSERT(internal::traits<Kernel>::Layout == internal::traits<OutputBackward>::Layout, YOU_MADE_A_PROGRAMMING_MISTAKE);
-
- static const bool isColMajor = (internal::traits<OutputBackward>::Layout == ColMajor);
-
- static const int NumDims = internal::traits<OutputBackward>::NumDimensions;
-
- // Number of filters to apply. This is the same as the output depth of the result
- const TensorIndex kernelFilters = isColMajor ? kern.dimensions()[0] : kern.dimensions()[4];
- // Number of channels. This is the same as the input depth.
- const TensorIndex kernelChannels = isColMajor ? kern.dimensions()[1] : kern.dimensions()[3];
- const TensorIndex kernelPlanes = isColMajor ? kern.dimensions()[2] : kern.dimensions()[2];
- const TensorIndex kernelRows = isColMajor ? kern.dimensions()[3] : kern.dimensions()[1];
- const TensorIndex kernelCols = isColMajor ? kern.dimensions()[4] : kern.dimensions()[0];
-
- const TensorIndex outputPlanes = isColMajor ? out.dimensions()[1] : out.dimensions()[NumDims - 2];
- const TensorIndex outputRows = isColMajor ? out.dimensions()[2] : out.dimensions()[NumDims - 3];
- const TensorIndex outputCols = isColMajor ? out.dimensions()[3] : out.dimensions()[NumDims - 4];
-
- TensorIndex forward_pad_z, forward_pad_y, forward_pad_x;
- const TensorIndex size_z = ceil(inputPlanes / static_cast<float>(stridePlanes));
- const TensorIndex size_y = ceil(inputRows / static_cast<float>(strideRows));
- const TensorIndex size_x = ceil(inputCols / static_cast<float>(strideCols));
-
- // Infer padding type.
- if (size_z == outputPlanes && size_y == outputRows && size_x == outputCols) {
- // SAME padding.
- const TensorIndex dz = size_z * stridePlanes + kernelPlanes - 1 - inputPlanes;
- const TensorIndex dy = size_y * strideRows + kernelRows - 1 - inputRows;
- const TensorIndex dx = size_x * strideCols + kernelCols - 1 - inputCols;
-
- forward_pad_z = dz - dz / 2;
- forward_pad_y = dy - dy / 2;
- forward_pad_x = dx - dx / 2;
- } else {
- // VALID padding.
- forward_pad_z = 0;
- forward_pad_y = 0;
- forward_pad_x = 0;
- }
- const TensorIndex padding_ztop = kernelPlanes - 1 - forward_pad_z;
- const TensorIndex padding_top = kernelRows - 1 - forward_pad_y;
- const TensorIndex padding_left = kernelCols - 1 - forward_pad_x;
-
- const TensorIndex padding_zbottom = inputPlanes + kernelPlanes - 1 - (outputPlanes - 1) * stridePlanes - 1 - padding_ztop;
- const TensorIndex padding_bottom = inputRows + kernelRows - 1 - (outputRows - 1) * strideRows - 1 - padding_top;
- const TensorIndex padding_right = inputCols + kernelCols - 1 - (outputCols - 1) * strideCols - 1 - padding_left;
-
- eigen_assert(padding_ztop >= 0);
- eigen_assert(padding_zbottom >= 0);
- eigen_assert(padding_top >= 0);
- eigen_assert(padding_left >= 0);
- eigen_assert(padding_bottom >= 0);
- eigen_assert(padding_right >= 0);
-
- // The kernel has dimensions filters X channels X patch_planes X patch_rows X patch_cols.
- // We need to reverse the kernel along the spatial dimensions.
- array<bool, 5> kernel_reverse;
- if (isColMajor) {
- kernel_reverse[0] = false;
- kernel_reverse[1] = false;
- kernel_reverse[2] = true;
- kernel_reverse[3] = true;
- kernel_reverse[4] = true;
- } else {
- kernel_reverse[0] = true;
- kernel_reverse[1] = true;
- kernel_reverse[2] = true;
- kernel_reverse[3] = false;
- kernel_reverse[4] = false;
- }
-
- DSizes<TensorIndex, 3> kernel_dims;
- if (isColMajor) {
- kernel_dims[0] = kernelFilters;
- kernel_dims[1] = kernelChannels;
- kernel_dims[2] = kernelRows * kernelCols * kernelPlanes;
- } else {
- kernel_dims[0] = kernelRows * kernelCols * kernelPlanes;
- kernel_dims[1] = kernelChannels;
- kernel_dims[2] = kernelFilters;
- }
-
- // The output_backward has dimensions out_depth X out_planes X out_rows X out_cols X OTHERS
- // When we extract the image patches from output_backward, it will have dimensions:
- // out_depth X (patch_planes * patch_rows * patch_cols) X (input_planes * input_rows * input_cols * OTHERS)
- DSizes<TensorIndex, 3> pre_contract_dims;
- if (isColMajor) {
- pre_contract_dims[0] = kernelFilters;
- pre_contract_dims[1] = kernelRows * kernelCols * kernelPlanes;
- pre_contract_dims[2] = inputRows * inputCols * inputPlanes;
- for (int i = 4; i < NumDims; ++i) {
- pre_contract_dims[2] *= out.dimension(i);
- }
- } else {
- pre_contract_dims[2] = kernelFilters;
- pre_contract_dims[1] = kernelRows * kernelCols * kernelPlanes;
- pre_contract_dims[0] = inputRows * inputCols * inputPlanes;
- for (int i = 0; i < NumDims - 4; ++i) {
- pre_contract_dims[0] *= out.dimension(i);
- }
- }
-
- // We will contract along dimensions (0, 2) in kernel and (0, 1) in
- // output_backward, if this is col-major, and
- // dimensions (0, 2) in kernel and (1, 2) in output_backward, if this row-major.
- array<IndexPair<TensorIndex>, 2> contract_dims;
- if (isColMajor) {
- // col-major: kernel.contract(output.patches)
- contract_dims[0] = IndexPair<TensorIndex>(0, 0);
- contract_dims[1] = IndexPair<TensorIndex>(2, 1);
- } else {
- // row-major: output.patches.contract(kernel)
- contract_dims[0] = IndexPair<TensorIndex>(1, 0);
- contract_dims[1] = IndexPair<TensorIndex>(2, 2);
- }
-
- // Post contraction, the dimensions of the input_backprop is
- // channels X input_planes X input_rows X input_cols X OTHERS
- DSizes<TensorIndex, NumDims> post_contract_dims;
- if (isColMajor) {
- post_contract_dims[0] = kernelChannels;
- post_contract_dims[1] = inputPlanes;
- post_contract_dims[2] = inputRows;
- post_contract_dims[3] = inputCols;
- for (int i = 4; i < NumDims; ++i) {
- post_contract_dims[i] = out.dimension(i);
- }
- } else {
- post_contract_dims[NumDims - 1] = kernelChannels;
- post_contract_dims[NumDims - 2] = inputPlanes;
- post_contract_dims[NumDims - 3] = inputRows;
- post_contract_dims[NumDims - 4] = inputCols;
- for (int i = 0; i < NumDims - 4; ++i) {
- post_contract_dims[i] = out.dimension(i);
- }
- }
-
- DSizes<TensorIndex, NumDims> strides;
- for (int i = 0; i < NumDims; i++) {
- strides[i] = 1;
- }
- if (isColMajor) {
- strides[1] = stridePlanes;
- strides[2] = strideRows;
- strides[3] = strideCols;
- } else {
- strides[NumDims - 2] = stridePlanes;
- strides[NumDims - 3] = strideRows;
- strides[NumDims - 4] = strideCols;
- }
-
- return choose(
- Cond<internal::traits<OutputBackward>::Layout == ColMajor>(),
- kernel.reverse(kernel_reverse)
- .reshape(kernel_dims)
- .contract(
- output_backward.extract_volume_patches(kernelPlanes, kernelRows, kernelCols,
- 1, 1, 1, stridePlanes, strideRows, strideCols,
- padding_ztop, padding_zbottom,
- padding_top, padding_bottom,
- padding_left, padding_right)
- .reshape(pre_contract_dims),
- contract_dims)
- .reshape(post_contract_dims),
- output_backward.extract_volume_patches(kernelPlanes, kernelRows, kernelCols,
- 1, 1, 1, stridePlanes, strideRows, strideCols,
- padding_ztop, padding_zbottom,
- padding_top, padding_bottom,
- padding_left, padding_right)
- .reshape(pre_contract_dims)
- .contract(kernel.reverse(kernel_reverse).reshape(kernel_dims),
- contract_dims)
- .reshape(post_contract_dims));
-}
-
-
-/** CuboidConvolutionBackwardKernel
- * \ingroup CXX11_NeuralNetworks_Module
- *
- * \brief Computes the backprop for the filter of a 3D convolution.
- *
- * The output_backward parameter is expected to be a tensor with a rank of 4 or more (channels, depth, height, width, and optionally others)
- * The kernel parameter is expected to be a 4D tensor (filters, channels, kernel_depth, kernel_height, kernel_width)
- * output_backward and kernel have to be in the same layout.
- *
- * The dimensions of the result will be filters, depth, height, width (and others if applicable).
- *
- * It is possible to swap the order of the depth, width and height dimensions provided that the same order is used in the input, the kernel, and the output.
- *
- * All dimension orders above are given for col-major, and should be reversed for row-major.
- */
-template <typename OutputBackward, typename Input>
-EIGEN_ALWAYS_INLINE static const typename internal::conditional<
- internal::traits<OutputBackward>::Layout == ColMajor,
- const TensorShufflingOp<
- const array<typename internal::traits<OutputBackward>::Index, 5>,
- const TensorReverseOp<
- const array<bool, 5>,
- const TensorReshapingOp<
- const DSizes<typename internal::traits<OutputBackward>::Index, 5>,
- const TensorContractionOp<
- const array< IndexPair<typename internal::traits<Input>::Index>, 2>,
- const TensorReshapingOp<
- const DSizes<typename internal::traits<Input>::Index, 3>,
- const Input>,
- const TensorReshapingOp<
- const DSizes< typename internal::traits<OutputBackward>::Index, 4>,
- const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const OutputBackward>
- >
- >
- >
- >
- >,
- const TensorShufflingOp<
- const array<typename internal::traits<OutputBackward>::Index, 5>,
- const TensorReverseOp<
- const array<bool, 5>,
- const TensorReshapingOp<
- const DSizes<typename internal::traits<OutputBackward>::Index, 5>,
- const TensorContractionOp<
- const array< IndexPair<typename internal::traits<Input>::Index>, 2>,
- const TensorReshapingOp<
- const DSizes< typename internal::traits<OutputBackward>::Index, 4>,
- const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const OutputBackward>
- >,
- const TensorReshapingOp<
- const DSizes<typename internal::traits<Input>::Index, 3>,
- const Input
- >
- >
- >
- >
- >
->::type
-CuboidConvolutionBackwardKernel(
- const Input& input, const OutputBackward& output_backward,
- typename internal::traits<Input>::Index kernelPlanes,
- typename internal::traits<Input>::Index kernelRows,
- typename internal::traits<Input>::Index kernelCols,
- const DenseIndex stridePlanes = 1,
- const DenseIndex strideRows = 1,
- const DenseIndex strideCols = 1) {
- typedef typename internal::traits<Input>::Index TensorIndex;
- TensorRef<Tensor<typename internal::traits<Input>::Scalar, internal::traits<Input>::NumDimensions, internal::traits<Input>::Layout, TensorIndex> > in(input);
- TensorRef<Tensor<typename internal::traits<OutputBackward>::Scalar, internal::traits<OutputBackward>::NumDimensions, internal::traits<OutputBackward>::Layout, TensorIndex> > out(output_backward);
-
- EIGEN_STATIC_ASSERT(internal::traits<Input>::Layout == internal::traits<OutputBackward>::Layout, YOU_MADE_A_PROGRAMMING_MISTAKE);
-
- static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
-
- static const int NumDims = internal::traits<Input>::NumDimensions;
- EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == internal::traits<OutputBackward>::NumDimensions, YOU_MADE_A_PROGRAMMING_MISTAKE);
-
- const TensorIndex inputPlanes = isColMajor ? in.dimension(1) : in.dimension(NumDims - 2);
- const TensorIndex inputRows = isColMajor ? in.dimension(2) : in.dimension(NumDims - 3);
- const TensorIndex inputCols = isColMajor ? in.dimension(3) : in.dimension(NumDims - 4);
-
- const TensorIndex outputPlanes = isColMajor ? out.dimension(1) : out.dimension(NumDims - 2);
- const TensorIndex outputRows = isColMajor ? out.dimension(2) : out.dimension(NumDims - 3);
- const TensorIndex outputCols = isColMajor ? out.dimension(3) : out.dimension(NumDims - 4);
-
- const TensorIndex kernelFilters = isColMajor ? out.dimension(0) : out.dimension(NumDims - 1);
- const TensorIndex kernelChannels = isColMajor ? in.dimension(0) : in.dimension(NumDims - 1);
-
- TensorIndex forward_pad_z, forward_pad_y, forward_pad_x;
- const TensorIndex size_z = ceil(inputPlanes / static_cast<float>(stridePlanes));
- const TensorIndex size_y = ceil(inputRows / static_cast<float>(strideRows));
- const TensorIndex size_x = ceil(inputCols / static_cast<float>(strideCols));
-
- // Infer padding type.
- if (size_z == outputPlanes && size_y == outputRows && size_x == outputCols) {
- // SAME padding.
- const TensorIndex dz = size_z * stridePlanes + kernelPlanes - 1 - inputPlanes;
- const TensorIndex dy = size_y * strideRows + kernelRows - 1 - inputRows;
- const TensorIndex dx = size_x * strideCols + kernelCols - 1 - inputCols;
-
- forward_pad_z = dz - dz / 2;
- forward_pad_y = dy - dy / 2;
- forward_pad_x = dx - dx / 2;
- } else {
- // VALID padding.
- forward_pad_z = 0;
- forward_pad_y = 0;
- forward_pad_x = 0;
- }
-
- const TensorIndex padding_ztop = kernelPlanes - 1 - forward_pad_z;
- const TensorIndex padding_top = kernelRows - 1 - forward_pad_y;
- const TensorIndex padding_left = kernelCols - 1 - forward_pad_x;
-
- const TensorIndex padding_zbottom = inputPlanes + kernelPlanes - 1 - (outputPlanes - 1) * stridePlanes - 1 - padding_ztop;
- const TensorIndex padding_bottom = inputRows + kernelRows - 1 - (outputRows - 1) * strideRows - 1 - padding_top;
- const TensorIndex padding_right = inputCols + kernelCols - 1 - (outputCols - 1) * strideCols - 1 - padding_left;
-
- eigen_assert(padding_ztop >= 0);
- eigen_assert(padding_zbottom >= 0);
- eigen_assert(padding_top >= 0);
- eigen_assert(padding_left >= 0);
- eigen_assert(padding_bottom >= 0);
- eigen_assert(padding_right >= 0);
-
- // The output_backward has dimensions out_depth X out_plaens X out_rows X out_cols X OTHERS
- // When we extract the image patches from output_backward (with input as the
- // kernel), it will have dimensions
- // (out_depth) X (input_planes * input_rows * input_cols) X (kernel_planes * kernel_rows * kernel_cols) X OTHERS
- DSizes<TensorIndex, 4> pre_contract_dims;
- if (isColMajor) {
- pre_contract_dims[0] = kernelFilters;
- pre_contract_dims[1] = inputRows * inputCols * inputPlanes;
- pre_contract_dims[2] = kernelRows * kernelCols * kernelPlanes;
- pre_contract_dims[3] = 1;
- for (int i = 4; i < NumDims; ++i) {
- pre_contract_dims[3] *= out.dimension(i);
- }
- } else {
- pre_contract_dims[3] = kernelFilters;
- pre_contract_dims[2] = inputRows * inputCols * inputPlanes;
- pre_contract_dims[1] = kernelRows * kernelCols * kernelPlanes;
- pre_contract_dims[0] = 1;
- for (int i = 0; i < NumDims - 4; ++i) {
- pre_contract_dims[0] *= out.dimension(i);
- }
- }
-
- // The input has dimensions in_depth X (input_planes * input_rows * input_cols) X OTHERS
- DSizes<TensorIndex, 3> input_dims;
- if (isColMajor) {
- input_dims[0] = kernelChannels;
- input_dims[1] = inputRows * inputCols * inputPlanes;
- input_dims[2] = 1;
- for (int i = 4; i < NumDims; ++i) {
- input_dims[2] *= in.dimension(i);
- }
- eigen_assert(input_dims[2] == pre_contract_dims[3]);
- } else {
- input_dims[2] = kernelChannels;
- input_dims[1] = inputRows * inputCols * inputPlanes;
- input_dims[0] = 1;
- for (int i = 0; i < NumDims - 4; ++i) {
- input_dims[0] *= in.dimension(i);
- }
- eigen_assert(input_dims[0] == pre_contract_dims[0]);
- }
-
- // We will contract along dimensions (1, 2) in in and (1, 3) in out, if
- // this is col-major.
- // For row-major, it's dimensions (0, 1) in in and (0, 2) in out.
- array<IndexPair<TensorIndex>, 2> contract_dims;
- if (isColMajor) {
- // col-major: in.contract(output.patches)
- contract_dims[0] = IndexPair<TensorIndex>(1, 1);
- contract_dims[1] = IndexPair<TensorIndex>(2, 3);
- } else {
- // row-major: output.patches.contract(in)
- contract_dims[0] = IndexPair<TensorIndex>(0, 0);
- contract_dims[1] = IndexPair<TensorIndex>(2, 1);
- }
-
- // After the contraction, the kernel will have dimension
- // in_depth X out_depth X kernel_patches X kernel_rows X kernel_cols
- // We will need to shuffle the first two dimensions and reverse the spatial dimensions.
- // The end shape is:
- // out_depth X in_shape X kernel_planes X kernel_rows X kernel_cols
-
- // This is the shape of the kernel *before* the shuffling.
- DSizes<TensorIndex, 5> kernel_dims;
- if (isColMajor) {
- kernel_dims[0] = kernelChannels;
- kernel_dims[1] = kernelFilters;
- kernel_dims[2] = kernelPlanes;
- kernel_dims[3] = kernelRows;
- kernel_dims[4] = kernelCols;
- } else {
- kernel_dims[0] = kernelCols;
- kernel_dims[1] = kernelRows;
- kernel_dims[2] = kernelPlanes;
- kernel_dims[3] = kernelFilters;
- kernel_dims[4] = kernelChannels;
- }
-
- // Flip filters and channels.
- array<TensorIndex, 5> kernel_shuffle;
- if (isColMajor) {
- kernel_shuffle[0] = 1;
- kernel_shuffle[1] = 0;
- kernel_shuffle[2] = 2;
- kernel_shuffle[3] = 3;
- kernel_shuffle[4] = 4;
- } else {
- kernel_shuffle[0] = 0;
- kernel_shuffle[1] = 1;
- kernel_shuffle[2] = 2;
- kernel_shuffle[3] = 4;
- kernel_shuffle[4] = 3;
- }
-
- // Reverse the spatial dimensions.
- array<bool, 5> kernel_reverse;
- if (isColMajor) {
- kernel_reverse[0] = false;
- kernel_reverse[1] = false;
- kernel_reverse[2] = true;
- kernel_reverse[3] = true;
- kernel_reverse[4] = true;
- } else {
- kernel_reverse[0] = true;
- kernel_reverse[1] = true;
- kernel_reverse[2] = true;
- kernel_reverse[3] = false;
- kernel_reverse[4] = false;
- }
-
- DSizes<TensorIndex, NumDims> strides;
- for (int i = 0; i < NumDims; i++) {
- strides[i] = 1;
- }
- if (isColMajor) {
- strides[1] = stridePlanes;
- strides[2] = strideRows;
- strides[3] = strideCols;
- } else {
- strides[NumDims - 2] = stridePlanes;
- strides[NumDims - 3] = strideRows;
- strides[NumDims - 4] = strideCols;
- }
- return choose(
- Cond<internal::traits<Input>::Layout == ColMajor>(),
- input.reshape(input_dims)
- .contract(
- output_backward.extract_volume_patches(
- inputPlanes, inputRows, inputCols, 1,
- 1, 1, stridePlanes, strideRows, strideCols,
-
- padding_ztop, padding_zbottom, padding_top,
- padding_bottom, padding_left, padding_right)
- .reshape(pre_contract_dims),
- contract_dims)
- .reshape(kernel_dims)
- .reverse(kernel_reverse)
- .shuffle(kernel_shuffle),
- output_backward.extract_volume_patches(
- inputPlanes, inputRows, inputCols, 1, 1, 1,
- stridePlanes, strideRows, strideCols, padding_ztop,
- padding_zbottom, padding_top, padding_bottom,
- padding_left, padding_right)
- .reshape(pre_contract_dims)
- .contract(input.reshape(input_dims), contract_dims)
- .reshape(kernel_dims)
- .reverse(kernel_reverse)
- .shuffle(kernel_shuffle));
-}
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_NEURAL_NETWORKS_BACKWARD_CUBOID_CONVOLUTIONS_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/BackwardSpatialConvolutions.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/BackwardSpatialConvolutions.h
deleted file mode 100644
index 0f4ada246c..0000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/BackwardSpatialConvolutions.h
+++ /dev/null
@@ -1,351 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2015 Ke Yang <yangke@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_NEURAL_NETWORKS_BACKWARD_SPATIAL_CONVOLUTIONS_H
-#define EIGEN_CXX11_NEURAL_NETWORKS_BACKWARD_SPATIAL_CONVOLUTIONS_H
-
-namespace Eigen {
-
-/** SpatialConvolutionBackwardInput
- * \ingroup CXX11_NeuralNetworks_Module
- *
- * \brief Computes the backprop for the input of a 2D convolution.
- *
- * The output_backward parameter is expected to be a tensor with a rank of 3 or more (channels, height, width, and optionally others)
- * The kernel parameter is expected to be a 4D tensor (filters, channels, kernel_height, kernel_width)
- * The output_backward and the kernel must both be in col-major layout. The result will also be in col-major layout.
- *
- * If in_stride > 1, then applies convolution with holes (aka atrous convolution), sampling every in_stride input pixels.
- *
- * The result can be assigned to a tensor of rank equal to the rank of the output_backward. The dimensions of the result will be filters, height, width (and others if applicable).
- *
- * It is possible to swap the order of the width and height dimensions provided that the same order is used in the input, the kernel, and the output.
- *
- */
-
-template <typename OutputBackward, typename Kernel>
-EIGEN_ALWAYS_INLINE
-static const typename internal::conditional<
- internal::traits<OutputBackward>::Layout == ColMajor,
- TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, internal::traits<OutputBackward>::NumDimensions>, const TensorContractionOp<const array<IndexPair<typename internal::traits<OutputBackward>::Index>, 2>, const TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, 3>, const TensorReverseOp<const array<bool, 4>, const Kernel> >, const TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, 3>, const TensorImagePatchOp<Dynamic, Dynamic, const OutputBackward> > > >,
- TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, internal::traits<OutputBackward>::NumDimensions>, const TensorContractionOp<const array<IndexPair<typename internal::traits<OutputBackward>::Index>, 2>, const TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, 3>, const TensorImagePatchOp<Dynamic, Dynamic, const OutputBackward> >, const TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, 3>, const TensorReverseOp<const array<bool, 4>, const Kernel> > > > >::type
-SpatialConvolutionBackwardInput(const Kernel& kernel, const OutputBackward& output_backward, typename internal::traits<OutputBackward>::Index inputRows, typename internal::traits<OutputBackward>::Index inputCols, const DenseIndex stride = 1, const DenseIndex in_stride = 1) {
-
- typedef typename internal::traits<OutputBackward>::Index TensorIndex;
- TensorRef<Tensor<typename internal::traits<Kernel>::Scalar, internal::traits<Kernel>::NumDimensions, internal::traits<Kernel>::Layout, TensorIndex> > kern(kernel);
- TensorRef<Tensor<typename internal::traits<OutputBackward>::Scalar, internal::traits<OutputBackward>::NumDimensions, internal::traits<OutputBackward>::Layout, TensorIndex> > out(output_backward);
-
- EIGEN_STATIC_ASSERT(internal::traits<Kernel>::Layout == internal::traits<OutputBackward>::Layout, YOU_MADE_A_PROGRAMMING_MISTAKE);
-
- static const bool isColMajor = (internal::traits<OutputBackward>::Layout == ColMajor);
-
- static const int NumDims = internal::traits<OutputBackward>::NumDimensions;
-
- // Number of filters to apply. This is the same as the output depth of the result
- const TensorIndex kernelFilters = isColMajor ? kern.dimensions()[0] : kern.dimensions()[3];
- // Number of channels. This is the same as the input depth.
- const TensorIndex kernelChannels = isColMajor ? kern.dimensions()[1] : kern.dimensions()[2];
- const TensorIndex kernelRows = isColMajor ? kern.dimensions()[2] : kern.dimensions()[1];
- const TensorIndex kernelCols = isColMajor ? kern.dimensions()[3] : kern.dimensions()[0];
-
- // This is the effective kernel size, taking into account the (in_stride - 1) zero-values
- // inserted between consecutive kernel elements in atrous convolution
- const TensorIndex kernelRowsEff = kernelRows + (kernelRows - 1) * (in_stride - 1);
- const TensorIndex kernelColsEff = kernelCols + (kernelCols - 1) * (in_stride - 1);
-
- const TensorIndex outputRows = isColMajor ? output_backward.dimension(1) : output_backward.dimension(NumDims - 2);
- const TensorIndex outputCols = isColMajor ? output_backward.dimension(2) : output_backward.dimension(NumDims - 3);
-
- // Computing the forward padding
- const TensorIndex forward_pad_top = ((outputRows - 1) * stride + kernelRowsEff - inputRows) / 2;
- const TensorIndex forward_pad_left = ((outputCols - 1) * stride + kernelColsEff - inputCols) / 2;
-
- const TensorIndex padding_top = kernelRowsEff - 1 - forward_pad_top;
- const TensorIndex padding_left = kernelColsEff - 1 - forward_pad_left;
- const TensorIndex padding_bottom = inputRows + kernelRowsEff - 1 - (outputRows - 1) * stride - 1 - padding_top;
- const TensorIndex padding_right = inputCols + kernelColsEff - 1 - (outputCols - 1) * stride - 1 - padding_left;
-
- eigen_assert(padding_top >= 0);
- eigen_assert(padding_left >= 0);
- eigen_assert(padding_bottom >= 0);
- eigen_assert(padding_right >= 0);
-
- // The kernel has dimensions filters X channels X patch_rows X patch_cols
- // We need to reverse the kernel along dimensions corresponding to rows and
- // cols.
- // TODO(yangke): we can make things slightly faster by collapsing the dimensions
- // where we don't reverse. Try that once we have a faster compiler.
- array<bool, 4> kernel_reverse;
- if (isColMajor) {
- kernel_reverse[0] = false;
- kernel_reverse[1] = false;
- kernel_reverse[2] = true;
- kernel_reverse[3] = true;
- } else {
- kernel_reverse[0] = true;
- kernel_reverse[1] = true;
- kernel_reverse[2] = false;
- kernel_reverse[3] = false;
- }
-
- DSizes<TensorIndex, 3> kernel_dims;
- if (isColMajor) {
- kernel_dims[0] = kernelFilters;
- kernel_dims[1] = kernelChannels;
- kernel_dims[2] = kernelRows * kernelCols;
- } else {
- kernel_dims[0] = kernelRows * kernelCols;
- kernel_dims[1] = kernelChannels;
- kernel_dims[2] = kernelFilters;
- }
-
- // The output_backward has dimensions out_depth X out_rows X out_cols X OTHERS
- // When we extract the image patches from output_backward, it will have dimensions
- // out_depth X (patch_rows * patch_cols) X (input_rows * input_cols * OTHERS)
- DSizes<TensorIndex, 3> pre_contract_dims;
- if (isColMajor) {
- pre_contract_dims[0] = kernelFilters;
- pre_contract_dims[1] = kernelRows * kernelCols;
- pre_contract_dims[2] = inputRows * inputCols;
- for (int i = 3; i < NumDims; ++i) {
- pre_contract_dims[2] *= out.dimension(i);
- }
- } else {
- pre_contract_dims[2] = kernelFilters;
- pre_contract_dims[1] = kernelRows * kernelCols;
- pre_contract_dims[0] = inputRows * inputCols;
- for (int i = 0; i < NumDims - 3; ++i) {
- pre_contract_dims[0] *= out.dimension(i);
- }
- }
-
- // We will contract along dimensions (0, 2) in kernel and (0, 1) in
- // output_backward, if this is col-major, and
- // dimensions (0, 2) in kernel and (1, 2) in output_backward, if this row-major.
- array<IndexPair<TensorIndex>, 2> contract_dims;
- if (isColMajor) {
- // col-major: kernel.contract(output.patches)
- contract_dims[0] = IndexPair<TensorIndex>(0, 0);
- contract_dims[1] = IndexPair<TensorIndex>(2, 1);
- } else {
- // row-major: output.patches.contract(kernel)
- contract_dims[0] = IndexPair<TensorIndex>(1, 0);
- contract_dims[1] = IndexPair<TensorIndex>(2, 2);
- }
-
- // Post contraction, the dimensions of the input_backprop is
- // channels X input_rows X input_cols X OTHERS
- DSizes<TensorIndex, NumDims> post_contract_dims;
- if (isColMajor) {
- post_contract_dims[0] = kernelChannels;
- post_contract_dims[1] = inputRows;
- post_contract_dims[2] = inputCols;
- for (int i = 3; i < NumDims; ++i) {
- post_contract_dims[i] = out.dimension(i);
- }
- } else {
- post_contract_dims[NumDims - 1] = kernelChannels;
- post_contract_dims[NumDims - 2] = inputRows;
- post_contract_dims[NumDims - 3] = inputCols;
- for (int i = 0; i < NumDims - 3; ++i) {
- post_contract_dims[i] = out.dimension(i);
- }
- }
-
- return choose(Cond<internal::traits<OutputBackward>::Layout == ColMajor>(),
- kernel.reverse(kernel_reverse).reshape(kernel_dims).contract(output_backward.extract_image_patches(kernelRows, kernelCols, 1, 1, in_stride, in_stride, stride, stride, padding_top, padding_bottom, padding_left, padding_right, 0).reshape(pre_contract_dims), contract_dims).reshape(post_contract_dims),
- output_backward.extract_image_patches(kernelRows, kernelCols, 1, 1, in_stride, in_stride, stride, stride, padding_top, padding_bottom, padding_left, padding_right, 0).reshape(pre_contract_dims).contract(kernel.reverse(kernel_reverse).reshape(kernel_dims), contract_dims).reshape(post_contract_dims));
-}
-
-
-/** SpatialConvolutionBackwardKernel
- * \ingroup CXX11_NeuralNetworks_Module
- *
- * \brief Computes the backprop for the filter of a 2D convolution.
- *
- * The output_backward parameter is expected to be a tensor with a rank of 3 or more (channels, height, width, and optionally others)
- * The kernel parameter is expected to be a 4D tensor (filters, channels, kernel_height, kernel_width)
- * The output_backward and the kernel must both be in col-major layout. The result will also be in col-major layout.
- *
- * If in_stride > 1, then applies convolution with holes (aka atrous convolution), sampling every in_stride input pixels.
- *
- * The result can be assigned to a tensor of rank equal to the rank of the output_backward. The dimensions of the result will be filters, height, width (and others if applicable).
- *
- * It is possible to swap the order of the width and height dimensions provided that the same order is used in the input, the kernel, and the output.
- *
- */
-// TODO(gpapan): Resolve a bug in TensorContractionInputMapper at SpatialConvolutions.h that yangke circumvented by using .reshape().reshape().
-// This can significantly accelerate SpatialConvolutionBackwardKernel.
-
-template <typename OutputBackward, typename Input>
-EIGEN_ALWAYS_INLINE
-static const typename internal::conditional<
- internal::traits<OutputBackward>::Layout == ColMajor,
- const TensorShufflingOp<const array<typename internal::traits<OutputBackward>::Index, 4>, const TensorReverseOp<const array<bool, 4>, const TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, 4>, const TensorContractionOp<const array<IndexPair<typename internal::traits<Input>::Index>, 2>, const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 3>, const Input>, const TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, 4>, const TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, 4>, const TensorImagePatchOp<Dynamic, Dynamic, const OutputBackward> > > > > > >,
- const TensorShufflingOp<const array<typename internal::traits<OutputBackward>::Index, 4>, const TensorReverseOp<const array<bool, 4>, const TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, 4>, const TensorContractionOp<const array<IndexPair<typename internal::traits<Input>::Index>, 2>, const TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, 4>, const TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, 4>, const TensorImagePatchOp<Dynamic, Dynamic, const OutputBackward> > >, const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 3>, const Input> > > > > >::type
-SpatialConvolutionBackwardKernel(const Input& input, const OutputBackward& output_backward, typename internal::traits<Input>::Index kernelRows, typename internal::traits<Input>::Index kernelCols, const DenseIndex stride = 1, const DenseIndex in_stride = 1) {
-
- typedef typename internal::traits<Input>::Index TensorIndex;
- TensorRef<Tensor<typename internal::traits<Input>::Scalar, internal::traits<Input>::NumDimensions, internal::traits<Input>::Layout, TensorIndex> > in(input);
- TensorRef<Tensor<typename internal::traits<OutputBackward>::Scalar, internal::traits<OutputBackward>::NumDimensions, internal::traits<OutputBackward>::Layout, TensorIndex> > out(output_backward);
-
- EIGEN_STATIC_ASSERT(internal::traits<Input>::Layout == internal::traits<OutputBackward>::Layout, YOU_MADE_A_PROGRAMMING_MISTAKE);
-
- // stride and in_stride cannot both be larger than 1
- eigen_assert(!(stride > 1 && in_stride > 1));
-
- static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
-
- static const int NumDims = internal::traits<Input>::NumDimensions;
- EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == internal::traits<OutputBackward>::NumDimensions, YOU_MADE_A_PROGRAMMING_MISTAKE);
-
- const TensorIndex inputRows = isColMajor ? in.dimension(1) : in.dimension(NumDims - 2);
- const TensorIndex inputCols = isColMajor ? in.dimension(2) : in.dimension(NumDims - 3);
-
- const TensorIndex outputRows = isColMajor ? output_backward.dimension(1) : output_backward.dimension(NumDims - 2);
- const TensorIndex outputCols = isColMajor ? output_backward.dimension(2) : output_backward.dimension(NumDims - 3);
-
- // Number of filters to apply. This is the same as the output depth of the result
- const TensorIndex kernelFilters = isColMajor ? out.dimensions()[0] : out.dimensions()[NumDims - 1];
-
- // Number of channels. This is the same as the input depth.
- const TensorIndex kernelChannels = isColMajor ? in.dimensions()[0] : in.dimensions()[NumDims - 1];
-
- // This is the effective kernel size, taking into account the (in_stride - 1) zero-values
- // inserted between consecutive kernel elements in atrous convolution
- const TensorIndex kernelRowsEff = kernelRows + (kernelRows - 1) * (in_stride - 1);
- const TensorIndex kernelColsEff = kernelCols + (kernelCols - 1) * (in_stride - 1);
-
- // Computing the forward padding
- const TensorIndex forward_pad_top = ((outputRows - 1) * stride + kernelRowsEff - inputRows) / 2;
- const TensorIndex forward_pad_left = ((outputCols - 1) * stride + kernelColsEff - inputCols) / 2;
-
- // TODO: factor out the padding computation.
- const TensorIndex padding_top = kernelRowsEff - 1 - forward_pad_top;
- const TensorIndex padding_left = kernelColsEff - 1 - forward_pad_left;
- const TensorIndex padding_bottom = inputRows + kernelRowsEff - 1 - (outputRows - 1) * stride - 1 - padding_top;
- const TensorIndex padding_right = inputCols + kernelColsEff - 1 - (outputCols - 1) * stride - 1 - padding_left;
-
- eigen_assert(padding_top >= 0);
- eigen_assert(padding_left >= 0);
- eigen_assert(padding_bottom >= 0);
- eigen_assert(padding_right >= 0);
-
- // The output_backward has dimensions out_depth X out_rows X out_cols X OTHERS
- // When we extract the image patches from output_backward (with input as the
- // kernel), it will have dimensions
- // (out_depth) X (input_rows * input_cols) X (kernel_rows * kernel_cols) X OTHERS
- DSizes<TensorIndex, 4> pre_contract_dims;
- if (isColMajor) {
- pre_contract_dims[0] = kernelFilters;
- pre_contract_dims[1] = inputRows * inputCols;
- pre_contract_dims[2] = kernelRows * kernelCols;
- pre_contract_dims[3] = 1;
- for (int i = 3; i < NumDims; ++i) {
- pre_contract_dims[3] *= out.dimension(i);
- }
- } else {
- pre_contract_dims[3] = kernelFilters;
- pre_contract_dims[2] = inputRows * inputCols;
- pre_contract_dims[1] = kernelRows * kernelCols;
- pre_contract_dims[0] = 1;
- for (int i = 0; i < NumDims - 3; ++i) {
- pre_contract_dims[0] *= out.dimension(i);
- }
- }
-
- // The input has dimensions in_depth X (input_rows * input_cols) X OTHERS
- DSizes<TensorIndex, 3> input_dims;
- if (isColMajor) {
- input_dims[0] = kernelChannels;
- input_dims[1] = inputRows * inputCols;
- input_dims[2] = 1;
- for (int i = 3; i < NumDims; ++i) {
- input_dims[2] *= in.dimension(i);
- }
- eigen_assert(input_dims[2] == pre_contract_dims[3]);
- } else {
- input_dims[2] = kernelChannels;
- input_dims[1] = inputRows * inputCols;
- input_dims[0] = 1;
- for (int i = 0; i < NumDims - 3; ++i) {
- input_dims[0] *= in.dimension(i);
- }
- eigen_assert(input_dims[0] == pre_contract_dims[0]);
- }
-
- // We will contract along dimensions (1, 2) in and (1, 3) in out, if
- // this is col-major.
- // For row-major, it's dimensions (0, 1) in and (0, 2) in out.
- array<IndexPair<TensorIndex>, 2> contract_dims;
- if (isColMajor) {
- // col-major: in.contract(output.patches)
- contract_dims[0] = IndexPair<TensorIndex>(1, 1);
- contract_dims[1] = IndexPair<TensorIndex>(2, 3);
- } else {
- // row-major: output.patches.contract(in)
- contract_dims[0] = IndexPair<TensorIndex>(0, 0);
- contract_dims[1] = IndexPair<TensorIndex>(2, 1);
- }
-
- // After the contraction, the kernel will have dimension
- // in_depth X out_depth X kernel_rows X kernel_cols
- // We will need to shuffle the first two dimensions and reverse the latter
- // two dimensions.
- // The end shape is
- // out_depth X in_shape X kernel_rows X kernel_cols
-
- // This is the shape of the kernel *before* the shuffling.
- DSizes<TensorIndex, 4> kernel_dims;
- if (isColMajor) {
- kernel_dims[0] = kernelChannels;
- kernel_dims[1] = kernelFilters;
- kernel_dims[2] = kernelRows;
- kernel_dims[3] = kernelCols;
- } else {
- kernel_dims[0] = kernelCols;
- kernel_dims[1] = kernelRows;
- kernel_dims[2] = kernelFilters;
- kernel_dims[3] = kernelChannels;
- }
-
- array<TensorIndex, 4> kernel_shuffle;
- if (isColMajor) {
- kernel_shuffle[0] = 1;
- kernel_shuffle[1] = 0;
- kernel_shuffle[2] = 2;
- kernel_shuffle[3] = 3;
- } else {
- kernel_shuffle[0] = 0;
- kernel_shuffle[1] = 1;
- kernel_shuffle[2] = 3;
- kernel_shuffle[3] = 2;
- }
-
- array<bool, 4> kernel_reverse;
- if (isColMajor) {
- kernel_reverse[0] = false;
- kernel_reverse[1] = false;
- kernel_reverse[2] = true;
- kernel_reverse[3] = true;
- } else {
- kernel_reverse[0] = true;
- kernel_reverse[1] = true;
- kernel_reverse[2] = false;
- kernel_reverse[3] = false;
- }
-
- return choose(Cond<internal::traits<Input>::Layout == ColMajor>(),
- input.reshape(input_dims).contract(output_backward.extract_image_patches(inputRows, inputCols, in_stride, in_stride, 1, 1, stride, stride, padding_top, padding_bottom, padding_left, padding_right, 0).reshape(pre_contract_dims).reshape(pre_contract_dims), contract_dims).reshape(kernel_dims).reverse(kernel_reverse).shuffle(kernel_shuffle),
- output_backward.extract_image_patches(inputRows, inputCols, in_stride, in_stride, 1, 1, stride, stride, padding_top, padding_bottom, padding_left, padding_right, 0).reshape(pre_contract_dims).reshape(pre_contract_dims).contract(input.reshape(input_dims), contract_dims).reshape(kernel_dims).reverse(kernel_reverse).shuffle(kernel_shuffle));
-}
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_NEURAL_NETWORKS_BACKWARD_SPATIAL_CONVOLUTIONS_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/CuboidConvolution.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/CuboidConvolution.h
deleted file mode 100644
index dfb9dcedba..0000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/CuboidConvolution.h
+++ /dev/null
@@ -1,179 +0,0 @@
-#ifndef EIGEN_CXX11_SRC_NEURAL_NETWORKS_CUBOID_CONVOLUTION_H
-#define EIGEN_CXX11_SRC_NEURAL_NETWORKS_CUBOID_CONVOLUTION_H
-
-#include "Patch3d.h"
-
-namespace Eigen {
-
-/** CuboidConvolution
- * \ingroup CXX11_NeuralNetworks_Module
- *
- * \brief Applies a 3D convolution over a multichannel input voxel block.
- *
- * The input parameter is expected to be a tensor with a rank of 4 or more (channels, depth, height, width, and optionally others).
- * The kernel parameter is expected to be a 5D tensor (filters, channels, kernel_depth, kernel_height, kernel_width).
- * The result can be assigned to a tensor of rank equal to the rank of the input. The dimensions of the result will be filters, depth, height, width (and others if applicable).
- *
- * The input and kernel have to be in the same layout, and both row-major and
- * col-major are supported. The shapes given above are for col-major layout.
- * For row-major, all dimensions should be reversed.
- *
- * It is possible to swap the order of the depth, width, and height dimensions provided that the same order is used in the input, the kernel, and the output.
- */
-template <typename Input, typename Kernel>
-EIGEN_ALWAYS_INLINE
-static const typename internal::conditional <
- internal::traits<Input>::Layout == ColMajor,
- TensorReshapingOp<
- const DSizes<typename internal::traits<Input>::Index,
- internal::traits<Input>::NumDimensions>,
- const TensorContractionOp<
- const array<IndexPair<typename internal::traits<Input>::Index>, 1>,
- const TensorReshapingOp<
- const DSizes<typename internal::traits<Input>::Index, 2>,
- const Kernel>,
- const TensorReshapingOp<
- const DSizes<typename internal::traits<Input>::Index, 2>,
- const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic,
- const Input> > > >,
- TensorReshapingOp<
- const DSizes<typename internal::traits<Input>::Index,
- internal::traits<Input>::NumDimensions>,
- const TensorContractionOp<
- const array<IndexPair<typename internal::traits<Input>::Index>, 1>,
- const TensorReshapingOp<
- const DSizes<typename internal::traits<Input>::Index, 2>,
- const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic,
- const Input> > ,
- const TensorReshapingOp<
- const DSizes<typename internal::traits<Input>::Index, 2>,
- const Kernel> > > >::type
-CuboidConvolution(const Input& input, const Kernel& kernel,
- const DenseIndex stridePlanes = 1,
- const DenseIndex strideRows = 1,
- const DenseIndex strideCols = 1,
- const PaddingType padding_type = PADDING_SAME) {
- typedef typename internal::traits<Input>::Index TensorIndex;
- TensorRef<Tensor<typename internal::traits<Input>::Scalar, internal::traits<Input>::NumDimensions, internal::traits<Input>::Layout, TensorIndex> > in(input);
- TensorRef<Tensor<typename internal::traits<Kernel>::Scalar, internal::traits<Kernel>::NumDimensions, internal::traits<Kernel>::Layout, TensorIndex> > kern(kernel);
-
- EIGEN_STATIC_ASSERT(internal::traits<Input>::Layout == internal::traits<Kernel>::Layout, YOU_MADE_A_PROGRAMMING_MISTAKE);
- static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
- static const int NumDims = internal::traits<Input>::NumDimensions;
-
- // Number of filters to apply. This is the same as the output depth of the result.
- const TensorIndex kernelFilters = isColMajor ? kern.dimensions()[0] : kern.dimensions()[4];
- const TensorIndex kernelChannels = isColMajor ? kern.dimensions()[1] : kern.dimensions()[3];
-
- // Spatial size of the kernel.
- const TensorIndex kernelDepth = isColMajor ? kern.dimensions()[2] : kern.dimensions()[2];
- const TensorIndex kernelRows = isColMajor ? kern.dimensions()[3] : kern.dimensions()[1];
- const TensorIndex kernelCols = isColMajor ? kern.dimensions()[4] : kern.dimensions()[0];
-
- if (isColMajor) {
- eigen_assert(kernelChannels == in.dimension(0));
- } else {
- eigen_assert(kernelChannels == in.dimension(NumDims - 1));
- }
-
- const TensorIndex inputPlanes = isColMajor ? in.dimension(1) : in.dimension(NumDims - 2);
- const TensorIndex inputRows = isColMajor ? in.dimension(2) : in.dimension(NumDims - 3);
- const TensorIndex inputCols = isColMajor ? in.dimension(3) : in.dimension(NumDims - 4);
-
- const float stride_planes_f = static_cast<float>(stridePlanes);
- const float stride_rows_f = static_cast<float>(strideRows);
- const float stride_cols_f = static_cast<float>(strideCols);
- TensorIndex out_depth;
- TensorIndex out_height;
- TensorIndex out_width;
- switch (padding_type) {
- case PADDING_VALID:
- out_depth = ceil((inputPlanes - kernelDepth + 1.f) / stride_planes_f);
- out_height = ceil((inputRows - kernelRows + 1.f) / stride_rows_f);
- out_width = ceil((inputCols - kernelCols + 1.f) / stride_cols_f);
- break;
- case PADDING_SAME:
- out_depth = ceil(inputPlanes / stride_planes_f);
- out_height = ceil(inputRows / stride_rows_f);
- out_width = ceil(inputCols / stride_cols_f);
- break;
- default:
- eigen_assert(false && "unexpected padding");
- }
-
- DSizes<TensorIndex, 2> kernel_dims;
- if (isColMajor) {
- kernel_dims[0] = kernelFilters;
- kernel_dims[1] = kernelChannels * kernelDepth * kernelRows * kernelCols;
- } else {
- kernel_dims[0] = kernelChannels * kernelDepth * kernelRows * kernelCols;
- kernel_dims[1] = kernelFilters;
- }
-
- // Molds the output of the patch extraction result into a 2D tensor:
- // - the first dimension (dims[0]): the patch values to be multiplied with the kernels
- // - the second dimension (dims[1]): everything else
- DSizes<TensorIndex, 2> pre_contract_dims;
- if (isColMajor) {
- pre_contract_dims[0] = kernelChannels * kernelDepth * kernelRows * kernelCols;
- pre_contract_dims[1] = out_depth * out_height * out_width;
- for (int i = 4; i < NumDims; ++i) {
- pre_contract_dims[1] *= in.dimension(i);
- }
- } else {
- pre_contract_dims[1] = kernelChannels * kernelDepth * kernelRows * kernelCols;
- pre_contract_dims[0] = out_depth * out_height * out_width;
- for (int i = 0; i < NumDims - 4; ++i) {
- pre_contract_dims[0] *= in.dimension(i);
- }
- }
-
- array<IndexPair<TensorIndex>, 1> contract_dims;
- contract_dims[0] = IndexPair<TensorIndex>(1, 0);
-
- // Molds the output of the contraction into the shape expected by the user
- // (assuming ColMajor):
- // - 1st dim: kernel filters
- // - 2nd dim: output depth
- // - 3nd dim: output height
- // - 4rd dim: output width
- // - 5th dim and beyond: everything else including batch size
- DSizes<TensorIndex, NumDims> post_contract_dims;
- if (isColMajor) {
- post_contract_dims[0] = kernelFilters;
- post_contract_dims[1] = out_depth;
- post_contract_dims[2] = out_height;
- post_contract_dims[3] = out_width;
- for (int i = 4; i < NumDims; ++i) {
- post_contract_dims[i] = in.dimension(i);
- }
- } else {
- post_contract_dims[NumDims - 1] = kernelFilters;
- post_contract_dims[NumDims - 2] = out_depth;
- post_contract_dims[NumDims - 3] = out_height;
- post_contract_dims[NumDims - 4] = out_width;
- for (int i = 0; i < NumDims - 4; ++i) {
- post_contract_dims[i] = in.dimension(i);
- }
- }
-
- return choose(
- Cond<internal::traits<Input>::Layout == ColMajor>(),
- kernel.reshape(kernel_dims)
- .contract(input.extract_volume_patches(
- kernelDepth, kernelRows, kernelCols, stridePlanes,
- strideRows, strideCols, padding_type)
- .reshape(pre_contract_dims),
- contract_dims)
- .reshape(post_contract_dims),
- input.extract_volume_patches(kernelDepth, kernelRows, kernelCols,
- stridePlanes, strideRows, strideCols,
- padding_type)
- .reshape(pre_contract_dims)
- .contract(kernel.reshape(kernel_dims), contract_dims)
- .reshape(post_contract_dims));
-}
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_SRC_NEURAL_NETWORKS_CUBOID_CONVOLUTION_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Patch3d.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Patch3d.h
deleted file mode 100644
index 2864f83299..0000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Patch3d.h
+++ /dev/null
@@ -1,240 +0,0 @@
-#ifndef EIGEN_CXX11_SRC_NEURAL_NETWORKS_PATCH3D_H
-#define EIGEN_CXX11_SRC_NEURAL_NETWORKS_PATCH3D_H
-
-#if not defined(__CUDACC__)
-#include <type_traits>
-#endif
-
-namespace Eigen {
-namespace internal {
-
-/** Extract3DPatches
- * \ingroup CXX11_NeuralNetworksModule
- *
- * \brief Extracts 3D patches from a multichannel input volume.
- *
- * The input parameter is expected to be a tensor with a rank of 4 or more
- * (channels, depth, height, width, optional others in col-major, and the
- * reverse order in row-major).
-
- * The return value will be a tensor of 3 more dimension than the input tensor.
- * In col-major, the first 4 dimensions of the result are: channels, patch_depth,
- * patch_height, patch_width. The next dimensions will identify the patch
- * position on the 3D grid of extracted patches: z, y, x. The remaining
- * dimensions, if any, will be the same as the 'other' dimensions of the input
- * tensor.
- */
-
-template <typename Input>
-EIGEN_ALWAYS_INLINE static const TensorStridingOp<
- const array<typename internal::traits<Input>::Index,
- internal::traits<Input>::NumDimensions + 3>,
- const TensorReshapingOp<
- const DSizes<typename internal::traits<Input>::Index,
- internal::traits<Input>::NumDimensions + 3>,
- const TensorPatchOp<
- const DSizes<typename internal::traits<Input>::Index,
- internal::traits<Input>::NumDimensions>,
- const TensorPaddingOp<
- const array<IndexPair<typename internal::traits<Input>::Index>,
- internal::traits<Input>::NumDimensions>,
- const Input> > > >
-Extract3DPatches(
- const Input& input, const DenseIndex patchPlanes,
- const DenseIndex patchRows, const DenseIndex patchCols,
- const DenseIndex stridePlanes, const DenseIndex strideRows,
- const DenseIndex strideCols,
- const DenseIndex paddingZTop, const DenseIndex paddingZBottom,
- const DenseIndex paddingTop, const DenseIndex paddingBottom,
- const DenseIndex paddingLeft, const DenseIndex paddingRight,
- const typename internal::traits<Input>::Scalar padding_value = 0) {
-
- typedef typename internal::traits<Input>::Index TensorIndex;
- TensorRef<Tensor<typename internal::traits<Input>::Scalar, internal::traits<Input>::NumDimensions, internal::traits<Input>::Layout, TensorIndex> > in(input);
-
- EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions >= 4, YOU_MADE_A_PROGRAMMING_MISTAKE);
-
- static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
- static const int NumDims = internal::traits<Input>::NumDimensions;
- static const int ExtDims = NumDims + 3;
-
- // Tensor size after patch extraction. We add three dimensions to unpack the
- // linear patch index into a 3D grid over which stride() can work.
- DSizes<TensorIndex, ExtDims> pre_stride_dims;
-
- if (isColMajor) {
- pre_stride_dims[0] = in.dimension(0);
- pre_stride_dims[1] = patchPlanes;
- pre_stride_dims[2] = patchRows;
- pre_stride_dims[3] = patchCols;
- } else {
- pre_stride_dims[ExtDims - 1] = in.dimension(NumDims - 1);
- pre_stride_dims[ExtDims - 4] = patchCols;
- pre_stride_dims[ExtDims - 3] = patchRows;
- pre_stride_dims[ExtDims - 2] = patchPlanes;
- }
-
- const TensorIndex inputPlanes = isColMajor ? in.dimension(1) : in.dimension(NumDims - 2);
- const TensorIndex inputRows = isColMajor ? in.dimension(2) : in.dimension(NumDims - 3);
- const TensorIndex inputCols = isColMajor ? in.dimension(3) : in.dimension(NumDims - 4);
-
- array<IndexPair<TensorIndex>, NumDims> paddings;
- for (int i = 0; i < NumDims; ++i) {
- paddings[i] = IndexPair<TensorIndex>(0, 0);
- }
-
- paddings[isColMajor ? 1 : (NumDims - 2)] = IndexPair<TensorIndex>(paddingZTop, paddingZBottom);
- paddings[isColMajor ? 2 : (NumDims - 3)] = IndexPair<TensorIndex>(paddingTop, paddingBottom);
- paddings[isColMajor ? 3 : (NumDims - 4)] = IndexPair<TensorIndex>(paddingLeft, paddingRight);
-
- pre_stride_dims[isColMajor ? 4 : (ExtDims - 5)] = inputPlanes + paddingZBottom + paddingZTop - patchPlanes + 1;
- pre_stride_dims[isColMajor ? 5 : (ExtDims - 6)] = inputRows + paddingTop + paddingBottom - patchRows + 1;
- pre_stride_dims[isColMajor ? 6 : (ExtDims - 7)] = inputCols + paddingLeft + paddingRight - patchCols + 1;
-
- if (isColMajor) {
- for (int i = 7; i < NumDims + 3; ++i) {
- pre_stride_dims[i] = in.dimension(i - 3);
- }
- } else {
- for (int i = 0; i < NumDims - 4; ++i) {
- pre_stride_dims[i] = in.dimension(i);
- }
- }
-
- DSizes<TensorIndex, NumDims> patch_dims;
- if (isColMajor) {
- patch_dims[0] = in.dimension(0);
- patch_dims[1] = patchPlanes;
- patch_dims[2] = patchRows;
- patch_dims[3] = patchCols;
- for (int i = 4; i < NumDims; ++i) {
- patch_dims[i] = 1;
- }
- } else {
- patch_dims[NumDims - 1] = in.dimension(NumDims - 1);
- patch_dims[NumDims - 4] = patchCols;
- patch_dims[NumDims - 3] = patchRows;
- patch_dims[NumDims - 2] = patchPlanes;
- for (int i = 0; i < NumDims - 4; i++) {
- patch_dims[i] = 1;
- }
- }
-
- array<TensorIndex, NumDims + 3> strides;
- if (isColMajor) {
- // No striding within the patches.
- for (int i = 0; i < 4; ++i) {
- strides[i] = 1;
- }
- // Apply striding in the spatial patch grid dimensions only.
- strides[4] = stridePlanes;
- strides[5] = strideRows;
- strides[6] = strideCols;
- // No striding in the remaining dimensions (batches, ...).
- for (int i = 7; i < NumDims + 3; i++) {
- strides[i] = 1;
- }
- } else {
- // No striding within the patches.
- for (int i = 1; i <= 4; ++i) {
- strides[ExtDims - i] = 1;
- }
- // Apply striding in the spatial patch grid dimensions only.
- strides[ExtDims - 7] = strideCols;
- strides[ExtDims - 6] = strideRows;
- strides[ExtDims - 5] = stridePlanes;
- // No striding in the remaining dimensions (batches, ...).
- for (int i = 0; i < NumDims - 4; i++) {
- strides[i] = 1;
- }
- }
-
- // TODO(mjanusz): Consider getting rid of pad(), and stride() and extend
- // extract_patches to take additional parameters for padding/striding,
- // similarly to extract_image_patches.
- return input.pad(paddings, padding_value).extract_patches(patch_dims).reshape(pre_stride_dims).stride(strides);
-}
-
-
-template <typename Input>
-EIGEN_ALWAYS_INLINE static const TensorStridingOp<
- const array<typename internal::traits<Input>::Index,
- internal::traits<Input>::NumDimensions + 3>,
- const TensorReshapingOp<
- const DSizes<typename internal::traits<Input>::Index,
- internal::traits<Input>::NumDimensions + 3>,
- const TensorPatchOp<
- const DSizes<typename internal::traits<Input>::Index,
- internal::traits<Input>::NumDimensions>,
- const TensorPaddingOp<
- const array<IndexPair<typename internal::traits<Input>::Index>,
- internal::traits<Input>::NumDimensions>,
- const Input> > > >
-Extract3DPatches(
- const Input& input, const DenseIndex patchPlanes,
- const DenseIndex patchRows, const DenseIndex patchCols,
- const DenseIndex stridePlanes, const DenseIndex strideRows,
- const DenseIndex strideCols, const PaddingType padding_type,
- const typename internal::traits<Input>::Scalar padding_value = 0) {
- typedef typename internal::traits<Input>::Index TensorIndex;
- TensorRef<Tensor<typename internal::traits<Input>::Scalar, internal::traits<Input>::NumDimensions, internal::traits<Input>::Layout, TensorIndex> > in(input);
-
- EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions >= 4, YOU_MADE_A_PROGRAMMING_MISTAKE);
-
- static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
- static const int NumDims = internal::traits<Input>::NumDimensions;
-
- const TensorIndex inputPlanes = isColMajor ? in.dimension(1) : in.dimension(NumDims - 2);
- const TensorIndex inputRows = isColMajor ? in.dimension(2) : in.dimension(NumDims - 3);
- const TensorIndex inputCols = isColMajor ? in.dimension(3) : in.dimension(NumDims - 4);
-
- switch (padding_type) {
- case PADDING_VALID:
- // No padding in any dimension.
- return Extract3DPatches(input, patchPlanes, patchRows, patchCols,
- stridePlanes, strideRows, strideCols,
- 0, 0, 0, 0, 0, 0, padding_value);
- case PADDING_SAME: {
- // The side of the tensor before striding should be just the expected
- // output times the stride.
- const TensorIndex size_z = ceil(inputPlanes / static_cast<float>(stridePlanes)) * stridePlanes;
- const TensorIndex size_y = ceil(inputRows / static_cast<float>(strideRows)) * strideRows;
- const TensorIndex size_x = ceil(inputCols / static_cast<float>(strideCols)) * strideCols;
-
- // The size of the patch space is going to be: padded_input_size - patch_size + 1.
- // This has to match the expected size before striding (pre_stride_dims).
- // The deltas below extend the input to the expected size.
- const TensorIndex dz = size_z + patchPlanes - 1 - inputPlanes;
- const TensorIndex dy = size_y + patchRows - 1 - inputRows;
- const TensorIndex dx = size_x + patchCols - 1 - inputCols;
-
- return Extract3DPatches(input, patchPlanes, patchRows, patchCols,
- stridePlanes, strideRows, strideCols,
- dz - dz / 2, dz / 2,
- dy - dy / 2, dy / 2,
- dx - dx / 2, dx / 2,
- padding_value);
- }
- default:
- eigen_assert(false && "unexpected padding");
- // unreachable code to avoid missing return warning.
- return Extract3DPatches(input, patchPlanes, patchRows, patchCols,
- stridePlanes, strideRows, strideCols,
- 0, 0, 0, 0, 0, 0, padding_value);
- }
-}
-
-// TODO(mjanusz): Switch this to a 'using' alias once CUDA supports C++11.
-template <typename Input>
-struct Extract3DPatchesType {
- typedef const TensorStridingOp< const array<typename internal::traits<Input>::Index, internal::traits<Input>::NumDimensions + 3>,
- const TensorReshapingOp< const DSizes<typename internal::traits<Input>::Index, internal::traits<Input>::NumDimensions + 3>,
- const TensorPatchOp< const DSizes<typename internal::traits<Input>::Index, internal::traits<Input>::NumDimensions>,
- const TensorPaddingOp< const array< IndexPair<typename internal::traits<Input>::Index>, internal::traits<Input>::NumDimensions>,
- const Input> > > > type;
-};
-
-} // end namespace internal
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_SRC_NEURAL_NETWORKS_PATCH3D_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Pooling.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Pooling.h
deleted file mode 100644
index 942b060ba7..0000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/Pooling.h
+++ /dev/null
@@ -1,433 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-#ifndef EIGEN_CXX11_NEURAL_NETWORKS_POOLING_H
-#define EIGEN_CXX11_NEURAL_NETWORKS_POOLING_H
-
-#include "Patch3d.h"
-
-namespace Eigen {
-
-/** SpatialMaxPooling
- * \ingroup CXX11_NeuralNetworks_Module
- *
- * \brief Applies a max-pooling over a multichannel input image.
- *
- * The input parameter is expected to be a with a rank of 4 (channels, height, width, others in col-major, and the reverse of that in row-major).
- *
- * The result can be assigned to a tensor of rank equal to the rank of the input. The dimensions of the result will be channels, height, width, and others (in col-major, and the reverse of that if the input was row-major).
- *
- * The order of the width and height dimensions can be swapped if needed.
- *
-*/
-#if !defined(EIGEN_HAS_INDEX_LIST)
-template <typename Input>
-EIGEN_ALWAYS_INLINE
-static const TensorReshapingOp<const Eigen::DSizes<typename internal::traits<Input>::Index, internal::traits<Input>::NumDimensions>, const TensorReductionOp<internal::MaxReducer<typename internal::remove_const<typename internal::traits<Input>::Scalar>::type>, const Eigen::array<int, 2>, const TensorImagePatchOp<Dynamic, Dynamic, const Input> > >
-#else
-template <typename Input>
-EIGEN_ALWAYS_INLINE
-static const TensorReshapingOp<const Eigen::DSizes<typename internal::traits<Input>::Index, internal::traits<Input>::NumDimensions>, const TensorReductionOp<internal::MaxReducer<typename internal::remove_const<typename internal::traits<Input>::Scalar>::type>, typename internal::conditional<internal::traits<Input>::Layout == ColMajor, const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> >, const Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3> > >::type, const TensorImagePatchOp<Dynamic, Dynamic, const Input> > >
-#endif
-SpatialMaxPooling(const Input& input, DenseIndex patchRows, DenseIndex patchCols,
- DenseIndex strideRows, DenseIndex strideCols, const PaddingType padding_type,
- DenseIndex in_strideRows = 1, DenseIndex in_strideCols = 1)
-{
- EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 4, YOU_MADE_A_PROGRAMMING_MISTAKE);
-
- typedef typename internal::traits<Input>::Index TensorIndex;
- TensorRef<Tensor<typename internal::traits<Input>::Scalar, internal::traits<Input>::NumDimensions, internal::traits<Input>::Layout, TensorIndex> > in(input);
-
- const DenseIndex patchRowsEff = patchRows + (patchRows - 1) * (in_strideRows - 1);
- const DenseIndex patchColsEff = patchCols + (patchCols - 1) * (in_strideCols - 1);
-
- static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
- static const int idxRows = isColMajor ? 1 : 2;
- static const int idxCols = isColMajor ? 2 : 1;
-
- // Molds the output of the reduction into the shape expected by the user.
- // (assuming col-major):
- // - 1st dim: channels
- // - 2nd dim: output height
- // - 3rd dim: output width
- // - 4th dim and beyond: everything else including batch size
- Eigen::DSizes<TensorIndex, internal::traits<Input>::NumDimensions> post_reduce_dims;
- post_reduce_dims[0] = in.dimension(0);
- if (padding_type == PADDING_VALID) {
- post_reduce_dims[idxRows] = numext::ceil((in.dimension(idxRows) - patchRowsEff + 1.f) / static_cast<float>(strideRows));
- post_reduce_dims[idxCols] = numext::ceil((in.dimension(idxCols) - patchColsEff + 1.f) / static_cast<float>(strideCols));
- } else {
- post_reduce_dims[idxRows] = numext::ceil(in.dimension(idxRows) / static_cast<float>(strideRows));
- post_reduce_dims[idxCols] = numext::ceil(in.dimension(idxCols) / static_cast<float>(strideCols));
- }
- post_reduce_dims[3] = in.dimension(3);
-
-#if !defined(EIGEN_HAS_INDEX_LIST)
- // nvcc doesn't support cxx11
- Eigen::array<int, 2> reduction_dims;
- if (isColMajor) {
- reduction_dims[0] = 1;
- reduction_dims[1] = 2;
- } else {
- reduction_dims[0] = 2;
- reduction_dims[1] = 3;
- }
-#else
- // Take advantage of cxx11 to give the compiler information it can use to
- // optimize the code.
- typename internal::conditional<internal::traits<Input>::Layout == ColMajor, const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> >, const Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3> > >::type reduction_dims;
-#endif
-
- return input.extract_image_patches(patchRows, patchCols, strideRows, strideCols, in_strideRows, in_strideCols, padding_type, -Eigen::NumTraits<typename internal::remove_const<typename internal::traits<Input>::Scalar>::type>::highest()).maximum(reduction_dims).reshape(post_reduce_dims);
-}
-
-/** CuboidMaxPooling
- * \ingroup CXX11_NeuralNetworks_Module
- *
- * \brief Applies a max-pooling over a multichannel input volume.
- *
- * The input parameter is expected to be a tensor with a rank of 5 (channels, depth, height, width, others in col-major, and the reverse of that in row-major).
- *
- * The result can be assigned to a tensor of rank equal to the rank of the input. The dimensions of the result will be channels, depth, height, width, and others (in col-major, and the reverse of that if the input was row-major).
- *
- * The order of the depth, width and height dimensions can be swapped if needed.
- *
-*/
-#if !defined(EIGEN_HAS_INDEX_LIST)
-template <typename Input>
-EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
- const Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>,
- const TensorReductionOp<
- internal::MaxReducer<float>, const Eigen::array<int, 1>,
- const TensorReshapingOp<
- const Eigen::DSizes<DenseIndex, 3>,
- const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Input> > > >
-#else
-template <typename Input>
-EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
- const Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>,
- const TensorReductionOp<
- internal::MaxReducer<float>,
- const Eigen::IndexList<Eigen::type2index<1> >,
- const TensorReshapingOp<
- const Eigen::DSizes<DenseIndex, 3>,
- const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Input> > > >
-#endif
-CuboidMaxPooling(const Input& input, DenseIndex patchPlanes,
- DenseIndex patchRows, DenseIndex patchCols,
- DenseIndex stridePlanes, DenseIndex strideRows,
- DenseIndex strideCols, const PaddingType padding_type) {
- EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 5, YOU_MADE_A_PROGRAMMING_MISTAKE);
- static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
-
- typedef typename internal::traits<Input>::Index TensorIndex;
- TensorRef<Tensor<typename internal::traits<Input>::Scalar, internal::traits<Input>::NumDimensions, internal::traits<Input>::Layout, TensorIndex> > in(input);
-
- static const int idxPlanes = isColMajor ? 1 : 3;
- static const int idxRows = 2;
- static const int idxCols = isColMajor ? 3 : 1;
-
- // Molds the output of the reduction into the shape expected by the used
- // (assuming col-major):
- // - 1st dim: channels
- // - 2nd dim: output depth
- // - 3rd dim: output height
- // - 4th dim: output width
- // - 5th dim and beyond: everything else including batch size
- Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions> post_reduce_dims;
- post_reduce_dims[0] = in.dimension(0);
- if (padding_type == PADDING_VALID) {
- post_reduce_dims[idxPlanes] = numext::ceil((in.dimension(idxPlanes) - patchPlanes + 1.f) / static_cast<float>(stridePlanes));
- post_reduce_dims[idxRows] = numext::ceil((in.dimension(idxRows) - patchRows + 1.f) / static_cast<float>(strideRows));
- post_reduce_dims[idxCols] = numext::ceil((in.dimension(idxCols) - patchCols + 1.f) / static_cast<float>(strideCols));
- } else {
- post_reduce_dims[idxPlanes] = numext::ceil(in.dimension(idxPlanes) / static_cast<float>(stridePlanes));
- post_reduce_dims[idxRows] = numext::ceil(in.dimension(idxRows) / static_cast<float>(strideRows));
- post_reduce_dims[idxCols] = numext::ceil(in.dimension(idxCols) / static_cast<float>(strideCols));
- }
- post_reduce_dims[4] = in.dimension(4);
-
- Eigen::DSizes<DenseIndex, 3> pre_reduce_dims;
- pre_reduce_dims[1] = patchRows * patchCols * patchPlanes;
- if (isColMajor) {
- pre_reduce_dims[0] = post_reduce_dims[0];
- pre_reduce_dims[2] = post_reduce_dims[1] * post_reduce_dims[2] * post_reduce_dims[3] * post_reduce_dims[4];
- } else {
- pre_reduce_dims[0] = post_reduce_dims[0] * post_reduce_dims[1] * post_reduce_dims[2] * post_reduce_dims[3];
- pre_reduce_dims[2] = post_reduce_dims[4];
- }
-
-#if !defined(EIGEN_HAS_INDEX_LIST)
- // nvcc doesn't support cxx11
- Eigen::array<int, 1> reduction_dims;
- reduction_dims[0] = 1;
-#else
- // Take advantage of cxx11 to give the compiler information it can use to
- // optimize the code.
- Eigen::IndexList<Eigen::type2index<1> > reduction_dims;
-#endif
- return input.extract_volume_patches(patchPlanes, patchRows, patchCols,
- stridePlanes, strideRows, strideCols,
- padding_type, -Eigen::NumTraits<float>::highest())
- .reshape(pre_reduce_dims)
- .maximum(reduction_dims)
- .reshape(post_reduce_dims);
-}
-
-
-/** SpatialAvgPooling
- * \ingroup CXX11_NeuralNetworks_Module
- *
- * \brief Applies an average pooling over a multichannel input image.
- *
- * The input parameter is expected to be a tensor with a rank of 4 (channels, height, width, others in col-major, and the reverse of that in row-major).
- *
- * The result can be assigned to a tensor of rank equal to the rank of the input. The dimensions of the result will be channels, height, width, and others (in col-major, and the reverse of that if the input was row-major).
- *
- * The order of the width and height dimensions can be swapped if needed.
- *
-*/
-namespace internal {
-
-template <typename T> struct AvgPoolMeanReducer
-{
-#if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__)
- // We only support packet access for floats.
- static const bool PacketAccess = internal::is_same<T, float>::value;
-#else
- static const bool PacketAccess = false;
-#endif
- static const bool IsStateful = true;
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE AvgPoolMeanReducer() : scalarCount_(0) {
- typedef typename packet_traits<T>::type Packet;
- packetCount_ = pset1<Packet>(0.0);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reduce(const T t, T* accum) {
- if (t != -Eigen::NumTraits<T>::highest()) {
- (*accum) = (*accum) + t;
- scalarCount_++;
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T initialize() const {
- return static_cast<T>(0);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalize(const T accum) const {
- eigen_assert(scalarCount_ > 0);
- return accum / scalarCount_;
- }
-
-#if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__)
-#ifdef EIGEN_VECTORIZE_AVX
-#define pequal(a,b) _mm256_cmp_ps(a,b,_CMP_EQ_UQ)
-#define psel(a,b,false_mask) _mm256_blendv_ps(a,b,false_mask)
-#else
-#define pequal(a,b) _mm_cmpeq_ps(a,b)
-#define psel(a,b,false_mask) _mm_or_ps(_mm_andnot_ps(false_mask, a), _mm_and_ps(false_mask, b))
-#endif
-
- template <typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void reducePacket(const Packet& p, Packet* accum) {
- reducePacketWithType(static_cast<T>(0), p, accum);
- }
-
- template <typename Packet>
- void reducePacketWithType(T, const Packet& p, Packet* accum) {
- Packet skip_mask = pequal(p, pset1<Packet>(-Eigen::NumTraits<T>::highest()));
- (*accum) = padd<Packet>(*accum, psel(p, pset1<Packet>(0), skip_mask));
- packetCount_ = padd<Packet>(packetCount_, psel(pset1<Packet>(1), pset1<Packet>(0), skip_mask));
- }
-
- template <typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet initializePacket() const {
- return pset1<Packet>(0);
- }
-
- template <typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet finalizePacket(const Packet& vaccum) const {
- return pdiv(vaccum, packetCount_);
- }
- template <typename Packet>
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T finalizeBoth(const T saccum, const Packet& vaccum) const {
- return (saccum + predux(vaccum)) / (scalarCount_ + predux(packetCount_));
- }
-#endif
-
- protected:
- typedef typename packet_traits<T>::type Packet;
- int scalarCount_;
- Packet packetCount_;
-};
-
-} // namespace internal
-
-#if !defined(EIGEN_HAS_INDEX_LIST)
-template <typename Input>
-EIGEN_ALWAYS_INLINE
-static const TensorReshapingOp<const Eigen::DSizes<typename internal::traits<Input>::Index, internal::traits<Input>::NumDimensions>, const TensorReductionOp<internal::AvgPoolMeanReducer<typename internal::remove_const<typename internal::traits<Input>::Scalar>::type>, const Eigen::array<int, 2>, const TensorImagePatchOp<Dynamic, Dynamic, const Input> > >
-#else
-template <typename Input>
-EIGEN_ALWAYS_INLINE
-static const TensorReshapingOp<const Eigen::DSizes<typename internal::traits<Input>::Index, internal::traits<Input>::NumDimensions>, const TensorReductionOp<internal::AvgPoolMeanReducer<typename internal::remove_const<typename internal::traits<Input>::Scalar>::type>, typename internal::conditional<internal::traits<Input>::Layout == ColMajor, const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> >, const Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3> > >::type, const TensorImagePatchOp<Dynamic, Dynamic, const Input> > >
-#endif
-SpatialAvgPooling(const Input& input, DenseIndex patchRows, DenseIndex patchCols,
- DenseIndex strideRows, DenseIndex strideCols, const PaddingType padding_type,
- DenseIndex in_strideRows = 1, DenseIndex in_strideCols = 1)
-{
- EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 4, YOU_MADE_A_PROGRAMMING_MISTAKE);
-
- typedef typename internal::traits<Input>::Index TensorIndex;
- TensorRef<Tensor<typename internal::traits<Input>::Scalar, internal::traits<Input>::NumDimensions, internal::traits<Input>::Layout, TensorIndex> > in(input);
-
- const DenseIndex patchRowsEff = patchRows + (patchRows - 1) * (in_strideRows - 1);
- const DenseIndex patchColsEff = patchCols + (patchCols - 1) * (in_strideCols - 1);
-
- static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
- static const int idxRows = isColMajor ? 1 : 2;
- static const int idxCols = isColMajor ? 2 : 1;
-
- // Molds the output of the reduction into the shape expected by the user.
- // (assuming col-major):
- // - 1st dim: channels
- // - 2nd dim: output height
- // - 3rd dim: output width
- // - 4th dim and beyond: everything else including batch size
- Eigen::DSizes<TensorIndex, internal::traits<Input>::NumDimensions> post_reduce_dims;
- post_reduce_dims[0] = in.dimension(0);
- if (padding_type == PADDING_VALID) {
- post_reduce_dims[idxRows] = numext::ceil((in.dimension(idxRows) - patchRowsEff + 1.f) / static_cast<float>(strideRows));
- post_reduce_dims[idxCols] = numext::ceil((in.dimension(idxCols) - patchColsEff + 1.f) / static_cast<float>(strideCols));
- } else {
- post_reduce_dims[idxRows] = numext::ceil(in.dimension(idxRows) / static_cast<float>(strideRows));
- post_reduce_dims[idxCols] = numext::ceil(in.dimension(idxCols) / static_cast<float>(strideCols));
- }
- post_reduce_dims[3] = in.dimension(3);
-
- typedef typename internal::remove_const<typename internal::traits<Input>::Scalar>::type CoeffReturnType;
- internal::AvgPoolMeanReducer<CoeffReturnType> mean_with_nan;
-
-#if !defined(EIGEN_HAS_INDEX_LIST)
- // nvcc doesn't support cxx11
- Eigen::array<int, 2> reduction_dims;
- if (isColMajor) {
- reduction_dims[0] = 1;
- reduction_dims[1] = 2;
- } else {
- reduction_dims[0] = 2;
- reduction_dims[1] = 3;
- }
-#else
- // Take advantage of cxx11 to give the compiler information it can use to
- // optimize the code.
- typename internal::conditional<internal::traits<Input>::Layout == ColMajor, const Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2> >, const Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3> > >::type reduction_dims;
-#endif
- return input.extract_image_patches(patchRows, patchCols, strideRows, strideCols, in_strideRows, in_strideCols, padding_type, -Eigen::NumTraits<typename internal::remove_const<typename internal::traits<Input>::Scalar>::type>::highest()).reduce(reduction_dims, mean_with_nan).reshape(post_reduce_dims);
-}
-
-
-/** CuboidAvgPooling
- * \ingroup CXX11_NeuralNetworks_Module
- *
- * \brief Applies an average pooling over a multichannel input volume.
- *
- * The input parameter is expected to be a tensor with a rank of 5 (channels, depth, height, width, others, and the reverse of that in row-major).
- *
- * The result can be assigned to a tensor of rank equal to the rank of the input. The dimensions of the result will be channels, depth, width, and others (in col-major, and the reverse of that if the input was row-major).
- *
- * The order of the depth, width and height dimensions can be swapped if needed.
- *
-*/
-#if !defined(EIGEN_HAS_INDEX_LIST)
-template <typename Input>
-EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
- const Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>,
- const TensorReductionOp<
- internal::AvgPoolMeanReducer<float>, const Eigen::array<int, 1>,
- const TensorReshapingOp<
- const Eigen::DSizes<DenseIndex, 3>,
- const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Input> > > >
-#else
-template <typename Input>
-EIGEN_ALWAYS_INLINE static const TensorReshapingOp<
- const Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions>,
- const TensorReductionOp<
- internal::AvgPoolMeanReducer<float>,
- const Eigen::IndexList<Eigen::type2index<1> >,
- const TensorReshapingOp<
- const Eigen::DSizes<DenseIndex, 3>,
- const TensorVolumePatchOp<Dynamic, Dynamic, Dynamic, const Input> > > >
-#endif
-CuboidAvgPooling(const Input& input, DenseIndex patchPlanes,
- DenseIndex patchRows, DenseIndex patchCols,
- DenseIndex stridePlanes, DenseIndex strideRows,
- DenseIndex strideCols, const PaddingType padding_type) {
- EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 5, YOU_MADE_A_PROGRAMMING_MISTAKE);
- static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
-
- typedef typename internal::traits<Input>::Index TensorIndex;
- TensorRef<Tensor<typename internal::traits<Input>::Scalar, internal::traits<Input>::NumDimensions, internal::traits<Input>::Layout, TensorIndex> > in(input);
-
- static const int idxPlanes = isColMajor ? 1 : 3;
- static const int idxRows = 2;
- static const int idxCols = isColMajor ? 3 : 1;
- // Molds the output of the reduction into the shape expected by the used
- // (assuming col-major):
- // - 1st dim: channels
- // - 2nd dim: outupt depth
- // - 3rd dim: output height
- // - 4th dim: output width
- // - 5th dim and beyond: everything else including batch size
- Eigen::DSizes<DenseIndex, internal::traits<Input>::NumDimensions> post_reduce_dims;
- post_reduce_dims[0] = in.dimension(0);
- if (padding_type == PADDING_VALID) {
- post_reduce_dims[idxPlanes] = numext::ceil((in.dimension(idxPlanes) - patchPlanes + 1.f) / static_cast<float>(stridePlanes));
- post_reduce_dims[idxRows] = numext::ceil((in.dimension(idxRows) - patchRows + 1.f) / static_cast<float>(strideRows));
- post_reduce_dims[idxCols] = numext::ceil((in.dimension(idxCols) - patchCols + 1.f) / static_cast<float>(strideCols));
- } else {
- post_reduce_dims[idxPlanes] = numext::ceil(in.dimension(idxPlanes) / static_cast<float>(stridePlanes));
- post_reduce_dims[idxRows] = numext::ceil(in.dimension(idxRows) / static_cast<float>(strideRows));
- post_reduce_dims[idxCols] = numext::ceil(in.dimension(idxCols) / static_cast<float>(strideCols));
- }
- post_reduce_dims[4] = in.dimension(4);
-
- Eigen::DSizes<DenseIndex, 3> pre_reduce_dims;
- pre_reduce_dims[1] = patchRows * patchCols * patchPlanes;
- if (isColMajor) {
- pre_reduce_dims[0] = post_reduce_dims[0];
- pre_reduce_dims[2] = post_reduce_dims[1] * post_reduce_dims[2] * post_reduce_dims[3] * post_reduce_dims[4];
- } else {
- pre_reduce_dims[0] = post_reduce_dims[0] * post_reduce_dims[1] * post_reduce_dims[2] * post_reduce_dims[3];
- pre_reduce_dims[2] = post_reduce_dims[4];
- }
-
- typedef typename internal::remove_const<typename internal::traits<Input>::Scalar>::type CoeffReturnType;
- internal::AvgPoolMeanReducer<CoeffReturnType> mean_with_nan;
-
-#if !defined(EIGEN_HAS_INDEX_LIST)
- // nvcc doesn't support cxx11
- Eigen::array<int, 1> reduction_dims;
- reduction_dims[0] = 1;
-#else
- // Take advantage of cxx11 to give the compiler information it can use to
- // optimize the code.
- Eigen::IndexList<Eigen::type2index<1> > reduction_dims;
-#endif
- return input.extract_volume_patches(patchPlanes, patchRows, patchCols,
- stridePlanes, strideRows, strideCols,
- padding_type, -Eigen::NumTraits<float>::highest())
- .reshape(pre_reduce_dims)
- .reduce(reduction_dims, mean_with_nan)
- .reshape(post_reduce_dims);
-}
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_NEURAL_NETWORKS_POOLING_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/SoftMax.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/SoftMax.h
deleted file mode 100644
index f0e21ab9c2..0000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/SoftMax.h
+++ /dev/null
@@ -1,83 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-#ifndef EIGEN_CXX11_NEURAL_NETWORKS_SOFTMAX_H
-#define EIGEN_CXX11_NEURAL_NETWORKS_SOFTMAX_H
-
-namespace Eigen {
-
-/** SoftMax
- * \ingroup CXX11_NeuralNetworks_Module
- *
- * \brief Applies a softmax
- *
- * The input parameter is expected to be a col-major tensor with a rank of 2 (depth and other).
- *
- * The result can be assigned to a tensor of rank and dimensions equal to that of the input. The result will be laid out in col-major order.
- *
-*/
-
-namespace {
-class SoftmaxOp {
- public:
- EIGEN_ALWAYS_INLINE SoftmaxOp(const float beta) : beta_(beta) { }
-
- template <typename Input> EIGEN_ALWAYS_INLINE
- typename Input::Dimensions dimensions(const Input& input) const {
- return input.dimensions();
- }
-
- template <typename Input, typename Output, typename Device>
- void eval(const Input& input, Output& output, const Device& device) const
- {
-#if !defined(EIGEN_HAS_INDEX_LIST)
- // nvcc doesn't support cxx11
- Eigen::array<typename internal::traits<Input>::Index, 1> depth_dim;
- depth_dim[0] = 0;
- Eigen::array<typename internal::traits<Input>::Index, 2> bcast;
- bcast[0] = dimensions(input)[0];
- bcast[1] = 1;
- DSizes<typename internal::traits<Input>::Index, 2> dims2d;
- dims2d[0] = 1;
- dims2d[1] = dimensions(input)[1];
-#else
- // Take advantage of cxx11 to give the compiler information it can use to
- // optimize the code.
- Eigen::IndexList<Eigen::type2index<0>> depth_dim;
- Eigen::IndexList<int, Eigen::type2index<1>> bcast;
- bcast.set(0, dimensions(input)[0]);
- Eigen::IndexList<Eigen::type2index<1>, typename internal::traits<Input>::Index> dims2d;
- dims2d.set(1, dimensions(input)[1]);
-#endif
-
- output.device(device) = ((input - input.maximum(depth_dim).eval().reshape(dims2d).broadcast(bcast)) * beta_).exp();
- output.device(device) = output / (output.sum(depth_dim).eval().reshape(dims2d).broadcast(bcast));
- }
-
- private:
- const float beta_;
-};
-}
-
-
-template <typename Input>
-EIGEN_ALWAYS_INLINE
-static const TensorCustomUnaryOp<const SoftmaxOp, const Input>
-SoftMax(const Input& input, const float beta)
-{
- EIGEN_STATIC_ASSERT(internal::traits<Input>::Layout == ColMajor, YOU_MADE_A_PROGRAMMING_MISTAKE);
- EIGEN_STATIC_ASSERT(internal::traits<Input>::NumDimensions == 2, YOU_MADE_A_PROGRAMMING_MISTAKE);
-
- const SoftmaxOp op(beta);
- return input.customOp(op);
-}
-
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_NEURAL_NETWORKS_SOFTMAX_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/SpatialConvolutions.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/SpatialConvolutions.h
deleted file mode 100644
index 8e2ddca6b5..0000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/SpatialConvolutions.h
+++ /dev/null
@@ -1,775 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-#ifndef EIGEN_CXX11_NEURAL_NETWORKS_SPATIAL_CONVOLUTIONS_H
-#define EIGEN_CXX11_NEURAL_NETWORKS_SPATIAL_CONVOLUTIONS_H
-
-namespace Eigen {
-
-namespace internal {
-
-// These optimizations require vector instructions
-#ifdef EIGEN_VECTORIZE
-
-// TODO: Consolidate this part of the code with the image patch extraction code
-// since they are both very similar.
-template <typename NewDimension, DenseIndex Rows, DenseIndex Cols, typename ArgType, typename Device,
- typename Scalar_, typename Index,
- typename nocontract_t, typename contract_t,
- int Side, size_t packet_size,
- bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
-class TensorContractionInputMapper<Scalar_, Index, Side, TensorEvaluator<const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType> >, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
-{
- public:
- typedef TensorContractionInputMapper<Scalar_, Index, Side, TensorEvaluator<const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType> >, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Self;
- typedef TensorContractionSubMapper<Scalar_, Index, Side, TensorEvaluator<const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType> >, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> SubMapper;
- typedef SubMapper VectorMapper;
- typedef SubMapper LinearMapper;
- typedef Scalar_ Scalar;
- typedef typename packet_traits<Scalar>::type Packet;
-
- TensorContractionInputMapper(const TensorEvaluator<const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType> >, Device>& tensor,
- const nocontract_t&, const nocontract_t&,
- const contract_t&, const contract_t&)
- : m_impl(tensor.impl().impl())
- {
- Index patch_rows;
- Index patch_depth;
- if (internal::traits<ArgType>::Layout == ColMajor) {
- patch_depth = tensor.impl().dimensions()[0];
- patch_rows = tensor.impl().dimensions()[1];
- m_patch_cols = tensor.impl().dimensions()[2];
- m_num_patches = tensor.impl().dimensions()[3];
- } else {
- static const int NumDims = tensor.impl().dimensions().size();
- patch_depth = tensor.impl().dimensions()[NumDims - 1];
- patch_rows = tensor.impl().dimensions()[NumDims - 2];
- m_patch_cols = tensor.impl().dimensions()[NumDims - 3];
- m_num_patches = tensor.impl().dimensions()[NumDims - 4];
- }
- m_patch_row_inflate_strides = tensor.impl().rowInflateStride();
- m_patch_col_inflate_strides = tensor.impl().colInflateStride();
-
- m_colStride = patch_rows;
-
- m_outputRows = tensor.impl().outputRows();
- m_row_strides = tensor.impl().userRowStride();
- m_col_strides = tensor.impl().userColStride();
-
- m_in_row_strides = tensor.impl().userInRowStride();
- m_in_col_strides = tensor.impl().userInColStride();
-
- if (internal::traits<ArgType>::Layout == ColMajor) {
- m_inputRows = tensor.impl().impl().dimensions()[1];
- m_inputCols = tensor.impl().impl().dimensions()[2];
- } else {
- static const int NumDims = tensor.impl().impl().dimensions().size();
- m_inputRows = tensor.impl().impl().dimensions()[NumDims - 2];
- m_inputCols = tensor.impl().impl().dimensions()[NumDims - 3];
- }
-
- m_rowInputStride = patch_depth;
- m_colInputStride = patch_depth * m_inputRows;
- m_patchInputStride = patch_depth * m_inputRows * m_inputCols;
-
- m_rowPaddingTop = tensor.impl().rowPaddingTop();
- m_colPaddingLeft = tensor.impl().colPaddingLeft();
-
- m_fastInputRowStride = internal::TensorIntDivisor<Index>(m_patch_row_inflate_strides);
- m_fastInputColStride = internal::TensorIntDivisor<Index>(m_patch_col_inflate_strides);
- m_fastNumPatches = internal::TensorIntDivisor<Index>(m_num_patches);
- m_fastColStride = internal::TensorIntDivisor<Index>(m_colStride);
- m_fastOutputRows = internal::TensorIntDivisor<Index>(m_outputRows);
- m_fastDimZero = internal::TensorIntDivisor<Index>(patch_depth);
- }
-
- TensorContractionInputMapper(const TensorContractionInputMapper& base_mapper) :
- m_impl(base_mapper.m_impl) {
- m_patch_cols = base_mapper.m_patch_cols;
- m_num_patches = base_mapper.m_num_patches;
- m_patch_row_inflate_strides = base_mapper.m_patch_row_inflate_strides;
- m_patch_col_inflate_strides = base_mapper.m_patch_col_inflate_strides;
-
- m_colStride = base_mapper.m_colStride;
-
- m_rowInputStride = base_mapper.m_rowInputStride;
- m_colInputStride = base_mapper.m_colInputStride;
- m_patchInputStride = base_mapper.m_patchInputStride;
-
- m_inputRows = base_mapper.m_inputRows;
- m_inputCols = base_mapper.m_inputCols;
-
- m_outputRows = base_mapper.m_outputRows;
- m_row_strides = base_mapper.m_row_strides;
- m_col_strides = base_mapper.m_col_strides;
-
- m_in_row_strides = base_mapper.m_in_row_strides;
- m_in_col_strides = base_mapper.m_in_col_strides;
-
- m_rowPaddingTop = base_mapper.m_rowPaddingTop;
- m_colPaddingLeft = base_mapper.m_colPaddingLeft;
-
- m_fastInputRowStride = base_mapper.m_fastInputRowStride;
- m_fastInputColStride = base_mapper.m_fastInputColStride;
- m_fastNumPatches = base_mapper.m_fastNumPatches;
- m_fastColStride = base_mapper.m_fastColStride;
- m_fastOutputRows = base_mapper.m_fastOutputRows;
- m_fastDimZero = base_mapper.m_fastDimZero;
- }
-
- // If true, turns off some optimizations for loading packets since the image
- // patches are "non-standard" such as there are non-trivial strides or
- // inflations in the input.
- EIGEN_DEVICE_FUNC
- EIGEN_ALWAYS_INLINE bool nonStandardPatches() const {
- return m_in_row_strides != 1 || m_in_col_strides != 1 || m_patch_row_inflate_strides != 1 || m_patch_col_inflate_strides != 1;
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE SubMapper getSubMapper(Index i, Index j) const {
- return SubMapper(*this, i, j);
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
- return LinearMapper(*this, i, j);
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_ALWAYS_INLINE Scalar operator()(Index row) const {
- Index rowIndex, colIndex, otherIndex;
- computeBaseIndices(0, rowIndex, colIndex, otherIndex);
- return loadCoeff(row, rowIndex, colIndex, otherIndex);
- }
-
- // Load the coefficient at the patchIndex location instead of the usual m_rowIndex,
- // m_colIndex, m_otherIndex. This is currently only used by the gpu code. EIGEN_DEVICE_FUNC
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar operator()(Index row, Index patchIndex) const {
- Index rowIndex, colIndex, otherIndex;
- computeBaseIndices(patchIndex, rowIndex, colIndex, otherIndex);
- return loadCoeff(row, rowIndex, colIndex, otherIndex);
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_ALWAYS_INLINE Packet loadPacket(Index row) const {
- Index rowIndex, colIndex, otherIndex;
- computeBaseIndices(0, rowIndex, colIndex, otherIndex);
- return loadPacket(row, rowIndex, colIndex, otherIndex);
- }
-
- // Load the packet at the patchIndex location instead of the usual m_rowIndex,
- // m_colIndex, m_otherIndex. This is currently only used by the gpu code.
- EIGEN_DEVICE_FUNC
- EIGEN_ALWAYS_INLINE Packet loadPacket(Index row, Index patchIndex) const {
- Index rowIndex, colIndex, otherIndex;
- computeBaseIndices(patchIndex, rowIndex, colIndex, otherIndex);
- return loadPacket(row, rowIndex, colIndex, otherIndex);
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_ALWAYS_INLINE const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
-
- EIGEN_DEVICE_FUNC
- EIGEN_ALWAYS_INLINE Index patchDepth() const { return m_rowInputStride; }
- EIGEN_DEVICE_FUNC
- EIGEN_ALWAYS_INLINE Index patchRows() const { return m_colStride; }
- EIGEN_DEVICE_FUNC
- EIGEN_ALWAYS_INLINE Index patchCols() const { return m_patch_cols; }
-
- EIGEN_DEVICE_FUNC
- EIGEN_ALWAYS_INLINE Packet packetNoPadding(const Index depth, const Index baseIndex) const {
- const Index inputIndex = depth + baseIndex;
- return m_impl.template packet<Unaligned>(inputIndex);
- }
-
- private:
- friend class TensorContractionSubMapper<Scalar, Index, Side, TensorEvaluator<const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType> >, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>;
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar loadCoeff(Index patchId, Index rowIndex, Index colIndex, Index otherIndex) const {
- // Find the offset of the element wrt the location of the first element.
- const Index patchOffset = patchId / m_fastDimZero;
-
- const Index colOffset = patchOffset / m_fastColStride;
- const Index inputCol = colIndex + colOffset * m_in_col_strides;
- const Index origInputCol = (m_patch_col_inflate_strides == 1) ? inputCol : ((inputCol >= 0) ? (inputCol / m_fastInputColStride) : 0);
- const Index rowOffset = patchOffset - colOffset * m_colStride;
- const Index inputRow = rowIndex + rowOffset * m_in_row_strides;
- const Index origInputRow = (m_patch_row_inflate_strides == 1) ? inputRow : ((inputRow >= 0) ? (inputRow / m_fastInputRowStride) : 0);
- if (origInputCol < 0 | origInputRow < 0 | origInputCol >= m_inputCols | origInputRow >= m_inputRows |
- (inputCol != origInputCol * m_patch_col_inflate_strides) | (inputRow != origInputRow * m_patch_row_inflate_strides)) {
- return Scalar(0);
- }
- const Index depth = patchId - patchOffset * patchDepth();
- const Index inputIndex = depth + origInputRow * m_rowInputStride + origInputCol * m_colInputStride + otherIndex;
- return m_impl.coeff(inputIndex);
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_STRONG_INLINE Scalar loadCoeffStandard(Index patchId, Index rowIndex, Index colIndex, Index otherIndex) const {
- eigen_assert(!nonStandardPatches());
-
- // Find the offset of the element wrt the location of the first element.
- const Index patchOffset = patchId / m_fastDimZero;
-
- const Index colOffset = patchOffset / m_fastColStride;
- const Index inputCol = colIndex + colOffset;
- const Index rowOffset = patchOffset - colOffset * m_colStride;
- const Index inputRow = rowIndex + rowOffset;
- if (inputCol < 0 || inputCol >= m_inputCols || inputRow < 0 || inputRow >= m_inputRows) {
- return Scalar(0);
- }
- const Index depth = patchId - patchOffset * patchDepth();
- const Index inputIndex = depth + inputRow * m_rowInputStride + inputCol * m_colInputStride + otherIndex;
- return m_impl.coeff(inputIndex);
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_ALWAYS_INLINE Packet loadPacket(Index patchId, Index rowIndex, Index colIndex, Index otherIndex) const {
- const Index packetSize = internal::unpacket_traits<Packet>::size;
- EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
- eigen_assert(patchId < patchDepth()*patchRows()*m_patch_cols);
-
- if (nonStandardPatches()) {
- return packetWithPossibleZero(patchId, rowIndex, colIndex, otherIndex);
- }
- return loadPacketStandard(patchId, rowIndex, colIndex, otherIndex);
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_ALWAYS_INLINE Packet loadPacketStandard(Index patchId, Index rowIndex, Index colIndex, Index otherIndex) const {
- const Index packetSize = internal::unpacket_traits<Packet>::size;
- EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
- eigen_assert(patchId < patchDepth()*patchRows()*m_patch_cols);
-
- eigen_assert(!nonStandardPatches());
-
- if ((patchDepth() % packetSize) == 0) {
- return loadPacketFast(patchId, rowIndex, colIndex, otherIndex);
- }
- else {
- const Index patchOffsets[2] = {patchId / m_fastDimZero, (patchId + packetSize - 1) / m_fastDimZero};
-
- const Index colOffsets[2] = {patchOffsets[0] / m_fastColStride, patchOffsets[1] / m_fastColStride};
-
- const Index inputCols[2] = {colIndex + colOffsets[0], colIndex + colOffsets[1]};
- if (inputCols[0] >= m_inputCols | inputCols[1] < 0) {
- // all zeros
- return internal::pset1<Packet>(Scalar(0));
- }
-
- if (inputCols[0] == inputCols[1]) {
- const Index rowOffsets[2] = {patchOffsets[0] - colOffsets[0]*m_colStride, patchOffsets[1] - colOffsets[1]*m_colStride};
- eigen_assert(rowOffsets[0] <= rowOffsets[1]);
- const Index inputRows[2] = {rowIndex + rowOffsets[0], rowIndex + rowOffsets[1]};
-
- if (inputRows[0] >= m_inputRows | inputRows[1] < 0) {
- // all zeros
- return internal::pset1<Packet>(Scalar(0));
- }
-
- if (inputRows[0] >= 0 & inputRows[1] < m_inputRows) {
- // no padding
- const Index depth = patchId - patchOffsets[0] * patchDepth();
- const Index inputIndex = depth + inputRows[0] * m_rowInputStride + inputCols[0] * m_colInputStride + otherIndex;
- return m_impl.template packet<Unaligned>(inputIndex);
- }
- }
- }
- return packetWithPossibleZero(patchId, rowIndex, colIndex, otherIndex);
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_ALWAYS_INLINE Packet loadPacketFast(Index patchId, Index rowIndex, Index colIndex, Index otherIndex) const {
- const Index packetSize = internal::unpacket_traits<Packet>::size;
- EIGEN_STATIC_ASSERT(packetSize > 1, YOU_MADE_A_PROGRAMMING_MISTAKE)
- eigen_assert(patchId < patchDepth()*patchRows()*m_patch_cols);
-
- eigen_assert(!nonStandardPatches());
- eigen_assert((patchDepth() % packetSize) == 0);
- // Find the offset of the element wrt the location of the first element.
- const Index patchOffset = patchId / m_fastDimZero;
- eigen_assert((patchId + packetSize - 1) / m_fastDimZero == patchOffset);
-
- const Index colOffset = patchOffset / m_fastColStride;
- const Index inputCol = colIndex + colOffset;
- const Index rowOffset = patchOffset - colOffset*m_colStride;
- const Index inputRow = rowIndex + rowOffset;
- if (inputCol < 0 | inputRow < 0 | inputCol >= m_inputCols | inputRow >= m_inputRows) {
- // all zeros
- return internal::pset1<Packet>(Scalar(0));
- }
- // no padding
- const Index depth = patchId - patchOffset * patchDepth();
- const Index inputIndex = depth + inputRow * m_rowInputStride + inputCol * m_colInputStride + otherIndex;
- return m_impl.template packet<Unaligned>(inputIndex);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet packetWithPossibleZero(Index patchId, Index rowIndex, Index colIndex, Index otherIndex) const
- {
- const int packetSize = internal::unpacket_traits<Packet>::size;
- EIGEN_ALIGN_MAX typename internal::remove_const<Scalar>::type values[packetSize];
- for (int i = 0; i < packetSize; ++i) {
- values[i] = loadCoeff(patchId+i, rowIndex, colIndex, otherIndex);
- }
- Packet rslt = internal::pload<Packet>(values);
- return rslt;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void computeBaseIndices(Index patchIndex, Index& rowIndex, Index& colIndex, Index& otherIndex) const {
- const int NumInputDims = array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
- otherIndex = (NumInputDims == 3) ? 0 : patchIndex / m_fastNumPatches;
- const Index patch2DIndex = (NumInputDims == 3) ? patchIndex : (patchIndex - otherIndex * m_num_patches);
- otherIndex *= m_patchInputStride;
- colIndex = patch2DIndex / m_fastOutputRows;
- rowIndex = patch2DIndex - colIndex * m_outputRows;
- colIndex = colIndex * m_col_strides - m_colPaddingLeft;
- rowIndex = rowIndex * m_row_strides - m_rowPaddingTop;
- }
-
- Index m_patch_cols; // number of colums in the patch
- Index m_num_patches; // number of patches to extract.
- Index m_patch_row_inflate_strides; // the strides for row inflation in the image patch
- Index m_patch_col_inflate_strides; // the strides for col inflation in the image patch
- // Fast representation of inflation strides.
- internal::TensorIntDivisor<Index> m_fastInputRowStride;
- internal::TensorIntDivisor<Index> m_fastInputColStride;
-
- Index m_otherStride;
- Index m_colStride;
- internal::TensorIntDivisor<Index> m_fastNumPatches;
- internal::TensorIntDivisor<Index> m_fastColStride;
-
- Index m_rowInputStride; // row stride in the input tensor
- Index m_colInputStride; // col stride in the input tensor
- Index m_patchInputStride; // patch stride in the input tensor
-
- Index m_inputRows; // Number of rows in the input tensor
- Index m_inputCols; // Number of cols in the input tensor
-
- Index m_outputRows; // Number of patch rows
-
- Index m_row_strides; // User specified row stride
- Index m_col_strides; // User specified col stride
-
- Index m_in_row_strides; // User specified input row stride
- Index m_in_col_strides; // User specified input col stride
-
- Index m_rowPaddingTop; // Row padding
- Index m_colPaddingLeft; // Column padding
-
- internal::TensorIntDivisor<Index> m_fastOutputRows;
- internal::TensorIntDivisor<Index> m_fastDimZero;
-
- const TensorEvaluator<ArgType, Device> m_impl;
-};
-
-
-template <typename NewDimension, DenseIndex Rows, DenseIndex Cols, typename ArgType, typename Device,
- typename Scalar_, typename Index,
- typename nocontract_t, typename contract_t,
- int Side, size_t packet_size,
- bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
-class TensorContractionSubMapper<Scalar_, Index, Side, TensorEvaluator<const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType> >, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>
-{
- public:
- typedef Scalar_ Scalar;
- typedef typename packet_traits<Scalar>::type Packet;
- typedef typename packet_traits<Scalar>::half HalfPacket;
-
- typedef TensorContractionInputMapper<Scalar, Index, Side, TensorEvaluator<const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType> >, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> ParentMapper;
- typedef TensorContractionSubMapper<Scalar, Index, Side, TensorEvaluator<const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType> >, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> Self;
- typedef Self LinearMapper;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionSubMapper(const ParentMapper& base_mapper, Index vert_offset, Index horiz_offset)
- : m_base_mapper(base_mapper), m_depth_offset(vert_offset), m_col_offset(horiz_offset) {
- m_base_mapper.computeBaseIndices(m_col_offset, m_rowIndex, m_colIndex, m_otherIndex);
- }
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorContractionSubMapper(const Self& base_mapper, Index vert_offset, Index horiz_offset)
- : m_base_mapper(base_mapper.m_base_mapper), m_depth_offset(vert_offset+base_mapper.m_depth_offset), m_col_offset(horiz_offset+base_mapper.m_col_offset) {
- m_base_mapper.computeBaseIndices(m_col_offset, m_rowIndex, m_colIndex, m_otherIndex);
- }
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i) const {
- return m_base_mapper.loadCoeff(i + m_depth_offset, m_rowIndex, m_colIndex, m_otherIndex);
- }
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar operator()(Index i, Index j) const {
- return m_base_mapper(i + m_depth_offset, j + m_col_offset);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i) const {
- return m_base_mapper.loadPacket(i + m_depth_offset, m_rowIndex, m_colIndex, m_otherIndex);
- }
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacket(Index i, Index j) const {
- return m_base_mapper.template loadPacket(i + m_depth_offset, j + m_col_offset);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar loadCoeffStandard(Index i) const {
- return m_base_mapper.loadCoeffStandard(i + m_depth_offset, m_rowIndex, m_colIndex, m_otherIndex);
- }
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacketFast(Index i) const {
- return m_base_mapper.loadPacketFast(i + m_depth_offset, m_rowIndex, m_colIndex, m_otherIndex);
- }
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet loadPacketStandard(Index i) const {
- return m_base_mapper.loadPacketStandard(i + m_depth_offset, m_rowIndex, m_colIndex, m_otherIndex);
- }
- template <typename Packet>
- EIGEN_DEVICE_FUNC bool aligned(Index) const {
- return false;
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_ALWAYS_INLINE bool nonStandardPatches() const {
- return m_base_mapper.nonStandardPatches();
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_ALWAYS_INLINE Index patchDepth() const { return m_base_mapper.m_rowInputStride; }
- EIGEN_DEVICE_FUNC
- EIGEN_ALWAYS_INLINE Index patchRows() const { return m_base_mapper.m_colStride; }
- EIGEN_DEVICE_FUNC
- EIGEN_ALWAYS_INLINE Index patchCols() const { return m_base_mapper.m_patch_cols; }
-
- EIGEN_DEVICE_FUNC
- EIGEN_ALWAYS_INLINE Packet packetNoPadding(const Index depth, const Index baseIndex) const {
- const Index inputIndex = depth + baseIndex;
- return m_base_mapper.m_impl.template packet<Unaligned>(inputIndex);
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_ALWAYS_INLINE bool padRow(const Index row) const {
- const Index r = m_rowIndex + row;
- return r < 0 | r >= m_base_mapper.m_inputRows;
- }
- EIGEN_DEVICE_FUNC
- EIGEN_ALWAYS_INLINE bool padCol(const Index col) const {
- const Index c = m_colIndex + col;
- return c < 0 | c >= m_base_mapper.m_inputCols;
- }
- EIGEN_DEVICE_FUNC
- EIGEN_ALWAYS_INLINE Index baseIndex(const Index row, const Index col) const {
- const Index r = m_rowIndex + row;
- const Index c = m_colIndex + col;
- return r * m_base_mapper.m_rowInputStride + c * m_base_mapper.m_colInputStride + m_otherIndex;
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_ALWAYS_INLINE Index rowOffset() const {
- const Index patchOffset = m_depth_offset / m_base_mapper.m_fastDimZero;
- const Index colOffset = patchOffset / m_base_mapper.m_fastColStride;
- return patchOffset-colOffset*m_base_mapper.m_colStride;
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_ALWAYS_INLINE Index colOffset() const {
- const Index patchOffset = m_depth_offset / m_base_mapper.m_fastDimZero;
- const Index colOffset = patchOffset / m_base_mapper.m_fastColStride;
- return colOffset;
- }
-
- EIGEN_DEVICE_FUNC
- EIGEN_ALWAYS_INLINE Index depthOffset() const {
- const Index patchOffset = m_depth_offset % m_base_mapper.patchDepth();
- return patchOffset;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE LinearMapper getLinearMapper(Index i, Index j) const {
- return LinearMapper(m_base_mapper, i + m_depth_offset, j + m_col_offset);
- }
-
- private:
- const ParentMapper& m_base_mapper; // that was a reference before
- Index m_depth_offset; // First row in the input matrix
- Index m_col_offset; // First col in the input matrix
-
- Index m_rowIndex; // precomputed row index corresponding to the col offset
- Index m_colIndex; // precomputed col index corresponding to the col offset
- Index m_otherIndex; // precomputed other index corresponding to the col offset
-
-};
-
-
-template <typename NewDimension, DenseIndex Rows, DenseIndex Cols, typename ArgType, typename Device,
- typename Scalar, typename Index,
- typename nocontract_t, typename contract_t,
- int Side, size_t packet_size,
- bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment, int nr>
-struct gemm_pack_rhs<Scalar, Index, TensorContractionSubMapper<Scalar, Index, Side, TensorEvaluator<const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType> >, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment>, nr, ColMajor, false, false> {
-
- typedef TensorContractionSubMapper<Scalar, Index, Side, TensorEvaluator<const TensorReshapingOp<NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType> >, Device>, nocontract_t, contract_t, packet_size, inner_dim_contiguous, inner_dim_reordered, Alignment> SubMapper;
- typedef SubMapper DataMapper;
-
- static inline Index ceil_div(Index a, Index b) {
- return (a + b - 1) / b;
- }
-
- EIGEN_DONT_INLINE void operator()(Scalar* block, const DataMapper& rhs, Index depth, Index cols, Index stride=0, Index offset=0) const {
- eigen_assert(stride == 0);
- eigen_assert(offset == 0);
-
- EIGEN_STATIC_ASSERT((nr == 4), YOU_MADE_A_PROGRAMMING_MISTAKE);
- typedef typename DataMapper::LinearMapper LinearMapper;
- typedef typename packet_traits<Scalar>::type Packet;
-
- const Index packet_cols4 = (cols/4) * 4;
- const Index peeled_k = (depth/packet_size) * packet_size;
- const bool non_standard_patches = rhs.nonStandardPatches();
-
- for(Index j2=0; j2<packet_cols4; j2+=4)
- {
- const SubMapper dm0 = rhs.getLinearMapper(0, j2 + 0);
- const SubMapper dm1 = rhs.getLinearMapper(0, j2 + 1);
- const SubMapper dm2 = rhs.getLinearMapper(0, j2 + 2);
- const SubMapper dm3 = rhs.getLinearMapper(0, j2 + 3);
-
- Index k=0;
- if((packet_size%4)==0 && !non_standard_patches)
- {
- const Index patch_depth = rhs.patchDepth();
- if ((patch_depth % packet_size) == 0) {
- const Index patch_cols = rhs.patchCols();
- const Index patch_rows = rhs.patchRows();
-
- const Index startCol = rhs.colOffset();
- const Index max_cols = std::min<Index>(ceil_div(peeled_k, patch_rows*patch_depth)+startCol, patch_cols);
-
- for (Index c = startCol; c < max_cols; ++c) {
- eigen_assert(k < peeled_k);
- const Index startRow = (c == startCol) ? rhs.rowOffset() : 0;
- const Index max_rows = std::min<Index>(ceil_div(peeled_k-c*patch_rows*patch_depth, patch_depth)+startRow, patch_rows);
-
- const bool pad_col0 = dm0.padCol(c);
- const bool pad_col1 = dm1.padCol(c);
- const bool pad_col2 = dm2.padCol(c);
- const bool pad_col3 = dm3.padCol(c);
- for (Index r = startRow; r < max_rows; ++r) {
- eigen_assert(k < peeled_k);
- const bool pad0 = pad_col0 || dm0.padRow(r);
- const bool pad1 = pad_col1 || dm1.padRow(r);
- const bool pad2 = pad_col2 || dm2.padRow(r);
- const bool pad3 = pad_col3 || dm3.padRow(r);
-
- const Index idx0 = dm0.baseIndex(r, c);
- const Index idx1 = dm1.baseIndex(r, c);
- const Index idx2 = dm2.baseIndex(r, c);
- const Index idx3 = dm3.baseIndex(r, c);
-
- const Index startDepth = ((c == startCol) && (r == startRow)) ? rhs.depthOffset() : 0;
- const Index max_depth = std::min<Index>(peeled_k-c*patch_rows*patch_depth-r*patch_depth+startDepth, patch_depth);
- eigen_assert(max_depth % packet_size == 0);
- for (Index d = startDepth; d < max_depth; d += packet_size) {
- eigen_assert(k < peeled_k);
- PacketBlock<Packet, 4> kernel;
- kernel.packet[0] = pad0 ? pset1<Packet>(0) : rhs.packetNoPadding(d, idx0);
- kernel.packet[1] = pad1 ? pset1<Packet>(0) : rhs.packetNoPadding(d, idx1);
- kernel.packet[2] = pad2 ? pset1<Packet>(0) : rhs.packetNoPadding(d, idx2);
- kernel.packet[3] = pad3 ? pset1<Packet>(0) : rhs.packetNoPadding(d, idx3);
- ptranspose(kernel);
- pstoreu(block+0*packet_size, kernel.packet[0]);
- pstoreu(block+1*packet_size, kernel.packet[1]);
- pstoreu(block+2*packet_size, kernel.packet[2]);
- pstoreu(block+3*packet_size, kernel.packet[3]);
- block+=4*packet_size;
- k += packet_size;
- }
- }
- }
-
- for(; k<peeled_k; k+=packet_size) {
- PacketBlock<Packet, 4> kernel;
- kernel.packet[0] = dm0.loadPacketFast(k);
- kernel.packet[1] = dm1.loadPacketFast(k);
- kernel.packet[2] = dm2.loadPacketFast(k);
- kernel.packet[3] = dm3.loadPacketFast(k);
- ptranspose(kernel);
- pstoreu(block+0*packet_size, kernel.packet[0]);
- pstoreu(block+1*packet_size, kernel.packet[1]);
- pstoreu(block+2*packet_size, kernel.packet[2]);
- pstoreu(block+3*packet_size, kernel.packet[3]);
- block+=4*packet_size;
- }
- }
- else {
- for(; k<peeled_k; k+=packet_size) {
- PacketBlock<Packet, 4> kernel;
- kernel.packet[0] = dm0.loadPacketStandard(k);
- kernel.packet[1] = dm1.loadPacketStandard(k);
- kernel.packet[2] = dm2.loadPacketStandard(k);
- kernel.packet[3] = dm3.loadPacketStandard(k);
- ptranspose(kernel);
- pstoreu(block+0*packet_size, kernel.packet[0]);
- pstoreu(block+1*packet_size, kernel.packet[1]);
- pstoreu(block+2*packet_size, kernel.packet[2]);
- pstoreu(block+3*packet_size, kernel.packet[3]);
- block+=4*packet_size;
- }
- }
- }
- if (!rhs.nonStandardPatches()) {
- for(; k<depth; k++)
- {
- block[0] = dm0.loadCoeffStandard(k);
- block[1] = dm1.loadCoeffStandard(k);
- block[2] = dm2.loadCoeffStandard(k);
- block[3] = dm3.loadCoeffStandard(k);
- block += 4;
- }
- }
- else {
- for(; k<depth; k++)
- {
- block[0] = dm0(k);
- block[1] = dm1(k);
- block[2] = dm2(k);
- block[3] = dm3(k);
- block += 4;
- }
- }
- }
-
- // copy the remaining columns one at a time (nr==1)
- for(Index j2=packet_cols4; j2<cols; ++j2)
- {
- const SubMapper dm0 = rhs.getLinearMapper(0, j2);
- for(Index k=0; k<depth; k++)
- {
- *block = dm0(k);
- block += 1;
- }
- }
- }
-};
-
-#endif // EIGEN_VECTORIZE
-} // end namespace internal
-
-
-/** SpatialConvolution
- * \ingroup CXX11_NeuralNetworks_Module
- *
- * \brief Applies a 2D convolution over a multichannel input image.
- *
- * The input parameter is expected to be a tensor with a rank of 3 or more (channels, height, width, and optionally others)
- * The kernel parameter is expected to be a 4D tensor (filters, channels, kernel_height, kernel_width)
- * The input and the kernel must both be in col-major layout. The result will also be in col-major layout.
- *
- * If in_stride > 1, then applies convolution with holes (aka atrous convolution), sampling every in_stride input pixels.
- *
- * The result can be assigned to a tensor of rank equal to the rank of the input. The dimensions of the result will be filters, height, width (and others if applicable).
- *
- * It is possible to swap the order of the width and height dimensions provided that the same order is used in the input, the kernel, and the output.
- *
- */
-template <typename Input, typename Kernel>
-EIGEN_ALWAYS_INLINE
-static const typename internal::conditional<
- internal::traits<Input>::Layout == ColMajor,
- TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, internal::traits<Input>::NumDimensions>, const TensorContractionOp<const array<IndexPair<typename internal::traits<Input>::Index>, 1>, const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 2>, const Kernel>, const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 2>, const TensorImagePatchOp<Dynamic, Dynamic, const Input> > > >,
- TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, internal::traits<Input>::NumDimensions>, const TensorContractionOp<const array<IndexPair<typename internal::traits<Input>::Index>, 1>, const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 2>, const TensorImagePatchOp<Dynamic, Dynamic, const Input> >, const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 2>, const Kernel> > > >::type
-SpatialConvolution(const Input& input, const Kernel& kernel, const DenseIndex stride = 1, const PaddingType padding_type = PADDING_SAME, const DenseIndex in_stride = 1) {
-
- typedef typename internal::traits<Input>::Index TensorIndex;
- TensorRef<Tensor<typename internal::traits<Input>::Scalar, internal::traits<Input>::NumDimensions, internal::traits<Input>::Layout, TensorIndex> > in(input);
- TensorRef<Tensor<typename internal::traits<Kernel>::Scalar, internal::traits<Kernel>::NumDimensions, internal::traits<Kernel>::Layout, TensorIndex> > kern(kernel);
-
- EIGEN_STATIC_ASSERT(internal::traits<Input>::Layout == internal::traits<Kernel>::Layout, YOU_MADE_A_PROGRAMMING_MISTAKE);
- static const bool isColMajor = (internal::traits<Input>::Layout == ColMajor);
-
- static const int NumDims = internal::traits<Input>::NumDimensions;
-
- // Number of filters to apply. This is the same as the output depth of the result
- const TensorIndex kernelFilters = isColMajor ? kern.dimensions()[0] : kern.dimensions()[3];
- // Number of channels. This is the same as the input depth.
- const TensorIndex kernelChannels = isColMajor ? kern.dimensions()[1] : kern.dimensions()[2];
- const TensorIndex kernelRows = isColMajor ? kern.dimensions()[2] : kern.dimensions()[1];
- const TensorIndex kernelCols = isColMajor ? kern.dimensions()[3] : kern.dimensions()[0];
-
- const DenseIndex kernelRowsEff = kernelRows + (kernelRows - 1) * (in_stride - 1);
- const DenseIndex kernelColsEff = kernelCols + (kernelCols - 1) * (in_stride - 1);
-
- array<IndexPair<TensorIndex>, 1> contract_dims;
- contract_dims[0] = IndexPair<TensorIndex>(1, 0);
-
- const TensorIndex InputRows = isColMajor ? in.dimension(1) : in.dimension(NumDims - 2);
- const TensorIndex InputCols = isColMajor ? in.dimension(2) : in.dimension(NumDims - 3);
-
- TensorIndex out_height;
- TensorIndex out_width;
- switch (padding_type) {
- case PADDING_VALID:
- out_height = numext::ceil((InputRows - kernelRowsEff + 1.f) / static_cast<float>(stride));
- out_width = numext::ceil((InputCols - kernelColsEff + 1.f) / static_cast<float>(stride));
- break;
- case PADDING_SAME:
- out_height = numext::ceil(InputRows / static_cast<float>(stride));
- out_width = numext::ceil(InputCols / static_cast<float>(stride));
- break;
- default:
- eigen_assert(false && "unexpected padding");
- }
-
- // Molds the output of the patch extraction code into a 2d tensor:
- // - the first dimension (dims[0]): the patch values to be multiplied with the kernels
- // - the second dimension (dims[1]): everything else
- DSizes<TensorIndex, 2> pre_contract_dims;
- if (isColMajor) {
- pre_contract_dims[0] = kernelChannels * kernelRows * kernelCols;
- pre_contract_dims[1] = out_height * out_width;
- for (int i = 3; i < NumDims; ++i) {
- pre_contract_dims[1] *= in.dimension(i);
- }
- } else {
- pre_contract_dims[1] = kernelChannels * kernelRows * kernelCols;
- pre_contract_dims[0] = out_height * out_width;
- for (int i = 0; i < NumDims - 3; ++i) {
- pre_contract_dims[0] *= in.dimension(i);
- }
- }
-
- // Molds the output of the contraction into the shape expected by the used
- // (assuming this is ColMajor):
- // - 1st dim: kernel filters
- // - 2nd dim: output height
- // - 3rd dim: output width
- // - 4th dim and beyond: everything else including batch size
- DSizes<TensorIndex, NumDims> post_contract_dims;
- if (isColMajor) {
- post_contract_dims[0] = kernelFilters;
- post_contract_dims[1] = out_height;
- post_contract_dims[2] = out_width;
- for (int i = 3; i < NumDims; ++i) {
- post_contract_dims[i] = in.dimension(i);
- }
- } else {
- post_contract_dims[NumDims - 1] = kernelFilters;
- post_contract_dims[NumDims - 2] = out_height;
- post_contract_dims[NumDims - 3] = out_width;
- for (int i = 0; i < NumDims - 3; ++i) {
- post_contract_dims[i] = in.dimension(i);
- }
- }
-
- DSizes<TensorIndex, 2> kernel_dims;
- if (isColMajor) {
- kernel_dims[0] = kernelFilters;
- kernel_dims[1] = kernelChannels * kernelRows * kernelCols;
- } else {
- kernel_dims[0] = kernelChannels * kernelRows * kernelCols;
- kernel_dims[1] = kernelFilters;
- }
- // TODO(yangke): choose() is defined in TensorContraction.h -- consider
- // moving it to somewhere more "common".
- return choose(Cond<internal::traits<Input>::Layout == ColMajor>(),
- kernel.reshape(kernel_dims).contract(input.extract_image_patches(kernelRows, kernelCols, stride, stride, in_stride, in_stride, padding_type).reshape(pre_contract_dims), contract_dims).reshape(post_contract_dims),
- input.extract_image_patches(kernelRows, kernelCols, stride, stride, in_stride, in_stride, padding_type).reshape(pre_contract_dims).contract(kernel.reshape(kernel_dims), contract_dims).reshape(post_contract_dims));
-}
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_NEURAL_NETWORKS_SPATIAL_CONVOLUTIONS_H
diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/TensorConvolutionByFFT.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/TensorConvolutionByFFT.h
deleted file mode 100644
index 0e72173536..0000000000
--- a/third_party/eigen3/unsupported/Eigen/CXX11/src/NeuralNetworks/TensorConvolutionByFFT.h
+++ /dev/null
@@ -1,289 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
-// Copyright (C) 2015 Jianwei Cui <thucjw@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#ifndef EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTIONBYFFT_H
-#define EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTIONBYFFT_H
-
-namespace Eigen {
-
-/** \class TensorConvolutionByFFT
- * \ingroup CXX11_Tensor_Module
- *
- * \brief Tensor convolution class.
- *
- *
- */
-namespace internal {
-
-
-template<typename Dimensions, typename InputXprType, typename KernelXprType>
-struct traits<TensorConvolutionByFFTOp<Dimensions, InputXprType, KernelXprType> >
-{
- // Type promotion to handle the case where the types of the lhs and the rhs are different.
- typedef typename promote_storage_type<typename InputXprType::Scalar,
- typename KernelXprType::Scalar>::ret Scalar;
- typedef typename packet_traits<Scalar>::type Packet;
- typedef typename promote_storage_type<typename traits<InputXprType>::StorageKind,
- typename traits<KernelXprType>::StorageKind>::ret StorageKind;
- typedef typename promote_index_type<typename traits<InputXprType>::Index,
- typename traits<KernelXprType>::Index>::type Index;
- typedef typename InputXprType::Nested LhsNested;
- typedef typename KernelXprType::Nested RhsNested;
- typedef typename remove_reference<LhsNested>::type _LhsNested;
- typedef typename remove_reference<RhsNested>::type _RhsNested;
- static const int NumDimensions = traits<InputXprType>::NumDimensions;
- static const int Layout = traits<InputXprType>::Layout;
-
- enum {
- Flags = 0,
- };
-};
-
-template<typename Dimensions, typename InputXprType, typename KernelXprType>
-struct eval<TensorConvolutionByFFTOp<Dimensions, InputXprType, KernelXprType>, Eigen::Dense>
-{
- typedef const TensorConvolutionByFFTOp<Dimensions, InputXprType, KernelXprType>& type;
-};
-
-template<typename Dimensions, typename InputXprType, typename KernelXprType>
-struct nested<TensorConvolutionByFFTOp<Dimensions, InputXprType, KernelXprType>, 1, typename eval<TensorConvolutionByFFTOp<Dimensions, InputXprType, KernelXprType> >::type>
-{
- typedef TensorConvolutionByFFTOp<Dimensions, InputXprType, KernelXprType> type;
-};
-
-} // end namespace internal
-
-
-
-template<typename Indices, typename InputXprType, typename KernelXprType>
-class TensorConvolutionByFFTOp : public TensorBase<TensorConvolutionByFFTOp<Indices, InputXprType, KernelXprType> >
-{
- public:
- typedef typename Eigen::internal::traits<TensorConvolutionByFFTOp>::Scalar Scalar;
- typedef typename Eigen::internal::traits<TensorConvolutionByFFTOp>::Packet Packet;
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
- typedef typename internal::promote_storage_type<typename InputXprType::CoeffReturnType,
- typename KernelXprType::CoeffReturnType>::ret CoeffReturnType;
- typedef typename internal::promote_storage_type<typename InputXprType::PacketReturnType,
- typename KernelXprType::PacketReturnType>::ret PacketReturnType;
- typedef typename Eigen::internal::nested<TensorConvolutionByFFTOp>::type Nested;
- typedef typename Eigen::internal::traits<TensorConvolutionByFFTOp>::StorageKind StorageKind;
- typedef typename Eigen::internal::traits<TensorConvolutionByFFTOp>::Index Index;
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorConvolutionByFFTOp(const InputXprType& input, const KernelXprType& kernel, const Indices& dims)
- : m_input_xpr(input), m_kernel_xpr(kernel), m_indices(dims) {}
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const Indices& indices() const { return m_indices; }
-
- /** \returns the nested expressions */
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const typename internal::remove_all<typename InputXprType::Nested>::type&
- inputExpression() const { return m_input_xpr; }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
- const typename internal::remove_all<typename KernelXprType::Nested>::type&
- kernelExpression() const { return m_kernel_xpr; }
-
- protected:
- typename InputXprType::Nested m_input_xpr;
- typename KernelXprType::Nested m_kernel_xpr;
- const Indices m_indices;
-};
-
-
-template<typename Indices, typename InputArgType, typename KernelArgType, typename Device>
-struct TensorEvaluator<const TensorConvolutionByFFTOp<Indices, InputArgType, KernelArgType>, Device>
-{
- typedef TensorConvolutionByFFTOp<Indices, InputArgType, KernelArgType> XprType;
-
- typedef typename XprType::Scalar Scalar;
- typedef typename XprType::CoeffReturnType CoeffReturnType;
- typedef typename XprType::PacketReturnType PacketReturnType;
-
- typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
-
- static const int NumDims = internal::array_size<typename TensorEvaluator<InputArgType, Device>::Dimensions>::value;
- static const int NumKernelDims = internal::array_size<Indices>::value;
- typedef typename XprType::Index Index;
- typedef DSizes<Index, NumDims> Dimensions;
-
- enum {
- IsAligned = TensorEvaluator<InputArgType, Device>::IsAligned &
- TensorEvaluator<KernelArgType, Device>::IsAligned,
- PacketAccess = false,
- BlockAccess = false,
- Layout = TensorEvaluator<InputArgType, Device>::Layout,
- CoordAccess = false, // to be implemented
- };
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
- : m_inputImpl(op.inputExpression(), device), m_kernelImpl(op.kernelExpression(), device), m_kernelArg(op.kernelExpression()), m_kernel(NULL), m_local_kernel(false), m_device(device)
- {
- EIGEN_STATIC_ASSERT((static_cast<int>(TensorEvaluator<InputArgType, Device>::Layout) == static_cast<int>(TensorEvaluator<KernelArgType, Device>::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE);
-
- const typename TensorEvaluator<InputArgType, Device>::Dimensions& input_dims = m_inputImpl.dimensions();
- const typename TensorEvaluator<KernelArgType, Device>::Dimensions& kernel_dims = m_kernelImpl.dimensions();
-
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- m_inputStride[0] = 1;
- for (int i = 1; i < NumDims; ++i) {
- m_inputStride[i] = m_inputStride[i - 1] * input_dims[i - 1];
- }
- } else {
- m_inputStride[NumDims - 1] = 1;
- for (int i = NumDims - 2; i >= 0; --i) {
- m_inputStride[i] = m_inputStride[i + 1] * input_dims[i + 1];
- }
- }
-
- m_dimensions = m_inputImpl.dimensions();
- if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
- for (int i = 0; i < NumKernelDims; ++i) {
- const Index index = op.indices()[i];
- const Index input_dim = input_dims[index];
- const Index kernel_dim = kernel_dims[i];
- const Index result_dim = input_dim - kernel_dim + 1;
- m_dimensions[index] = result_dim;
- if (i > 0) {
- m_kernelStride[i] = m_kernelStride[i - 1] * kernel_dims[i - 1];
- } else {
- m_kernelStride[0] = 1;
- }
- m_indexStride[i] = m_inputStride[index];
- }
-
- m_outputStride[0] = 1;
- for (int i = 1; i < NumDims; ++i) {
- m_outputStride[i] = m_outputStride[i - 1] * m_dimensions[i - 1];
- }
- } else {
- for (int i = NumKernelDims - 1; i >= 0; --i) {
- const Index index = op.indices()[i];
- const Index input_dim = input_dims[index];
- const Index kernel_dim = kernel_dims[i];
- const Index result_dim = input_dim - kernel_dim + 1;
- m_dimensions[index] = result_dim;
- if (i < NumKernelDims - 1) {
- m_kernelStride[i] = m_kernelStride[i + 1] * kernel_dims[i + 1];
- } else {
- m_kernelStride[NumKernelDims - 1] = 1;
- }
- m_indexStride[i] = m_inputStride[index];
- }
-
- m_outputStride[NumDims - 1] = 1;
- for (int i = NumDims - 2; i >= 0; --i) {
- m_outputStride[i] = m_outputStride[i + 1] * m_dimensions[i + 1];
- }
- }
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
-
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(Scalar* data) {
- m_inputImpl.evalSubExprsIfNeeded(NULL);
- m_kernelImpl.evalSubExprsIfNeeded(NULL);
-
- typedef typename internal::traits<InputArgType>::Index TensorIndex;
-
- Tensor<Scalar, NumDims, Layout, TensorIndex> input(m_inputImpl.dimensions());
- for (int i = 0; i < m_inputImpl.dimensions().TotalSize(); ++i) {
- input.data()[i] = m_inputImpl.coeff(i);
- }
-
- Tensor<Scalar, NumDims, Layout, TensorIndex> kernel(m_kernelImpl.dimensions());
- for (int i = 0; i < m_kernelImpl.dimensions().TotalSize(); ++i) {
- kernel.data()[i] = m_kernelImpl.coeff(i);
- }
-
- array<std::pair<ptrdiff_t, ptrdiff_t>, NumDims> paddings;
- for (int i = 0; i < NumDims; ++i) {
- paddings[i] = std::make_pair(0, m_inputImpl.dimensions()[i] - m_kernelImpl.dimensions()[i]);
- }
-
- Eigen::array<bool, NumKernelDims> reverse;
- for (int i = 0; i < NumKernelDims; ++i) {
- reverse[i] = true;
- }
-
- Eigen::array<bool, NumDims> fft;
- for (int i = 0; i < NumDims; ++i) {
- fft[i] = i;
- }
-
- Eigen::DSizes<TensorIndex, NumDims> slice_offsets;
- for (int i = 0; i < NumDims; ++i) {
- slice_offsets[i] = m_kernelImpl.dimensions()[i] - 1;
- }
-
- Eigen::DSizes<TensorIndex, NumDims> slice_extents;
- for (int i = 0; i < NumDims; ++i) {
- slice_extents[i] = m_inputImpl.dimensions()[i] - m_kernelImpl.dimensions()[i] + 1;
- }
-
- Tensor<Scalar, NumDims, Layout, TensorIndex> kernel_variant = kernel.reverse(reverse).pad(paddings);
- Tensor<std::complex<Scalar>, NumDims, Layout, TensorIndex> kernel_fft = kernel_variant.template fft<Eigen::BothParts, FFT_FORWARD>(fft);
- //Tensor<std::complex<Scalar>, NumDims, Layout|IndexType> kernel_fft = kernel.reverse(reverse).pad(paddings).template fft<2>(fft);
- Tensor<std::complex<Scalar>, NumDims, Layout, TensorIndex> input_fft = input.template fft<Eigen::BothParts, FFT_FORWARD>(fft);
- Tensor<std::complex<Scalar>, NumDims, Layout, TensorIndex> prod = (input_fft * kernel_fft).template fft<Eigen::BothParts, FFT_REVERSE>(fft);
- Tensor<std::complex<Scalar>, NumDims, Layout, TensorIndex> tensor_result = prod.slice(slice_offsets, slice_extents);
-
- for (int i = 0; i < tensor_result.size(); ++i) {
- data[i] = std::real(tensor_result.data()[i]);
- }
- return false;
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
- m_inputImpl.cleanup();
- if (m_local_kernel) {
- m_device.deallocate((void*)m_kernel);
- m_local_kernel = false;
- }
- m_kernel = NULL;
- }
-
- void evalTo(typename XprType::Scalar* buffer) {
- evalSubExprsIfNeeded(NULL);
- for (int i = 0; i < dimensions().TotalSize(); ++i) {
- buffer[i] += coeff(i);
- }
- cleanup();
- }
-
- EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
- {
- CoeffReturnType result = CoeffReturnType(0);
- return result;
- }
-
- EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; }
-
- private:
- array<Index, NumDims> m_inputStride;
- array<Index, NumDims> m_outputStride;
-
- array<Index, NumKernelDims> m_indexStride;
- array<Index, NumKernelDims> m_kernelStride;
- TensorEvaluator<InputArgType, Device> m_inputImpl;
- TensorEvaluator<KernelArgType, Device> m_kernelImpl;
- Dimensions m_dimensions;
-
- KernelArgType m_kernelArg;
- const Scalar* m_kernel;
- bool m_local_kernel;
- const Device& m_device;
-};
-
-} // end namespace Eigen
-
-#endif // EIGEN_CXX11_TENSOR_TENSOR_CONVOLUTIONBYFFT_H
diff --git a/third_party/llvm/llvm.autogenerated.BUILD b/third_party/llvm/llvm.autogenerated.BUILD
index c3b9ec4c25..0ac27e26a4 100644
--- a/third_party/llvm/llvm.autogenerated.BUILD
+++ b/third_party/llvm/llvm.autogenerated.BUILD
@@ -1942,7 +1942,7 @@ cc_library(
"include/llvm/BinaryFormat/COFF.h",
"include/llvm/BinaryFormat/MachO.h",
"lib/Support/*.h",
- ] + llvm_support_platform_specific_srcs_glob),
+ ]) + llvm_support_platform_specific_srcs_glob(),
hdrs = glob([
"include/llvm/Support/*.h",
"include/llvm/Support/*.def",
diff --git a/third_party/llvm/llvm.bzl b/third_party/llvm/llvm.bzl
index dfdacafceb..d493a3c476 100644
--- a/third_party/llvm/llvm.bzl
+++ b/third_party/llvm/llvm.bzl
@@ -7,103 +7,143 @@ TODO(chandlerc): Currently this expresses include-based dependencies as
correctly understood by the build system.
"""
+def _dict_add(*dictionaries):
+ """Returns a new `dict` that has all the entries of the given dictionaries.
+
+ If the same key is present in more than one of the input dictionaries, the
+ last of them in the argument list overrides any earlier ones.
+
+ This function is designed to take zero or one arguments as well as multiple
+ dictionaries, so that it follows arithmetic identities and callers can avoid
+ special cases for their inputs: the sum of zero dictionaries is the empty
+ dictionary, and the sum of a single dictionary is a copy of itself.
+
+ Re-implemented here to avoid adding a dependency on skylib.
+
+ Args:
+ *dictionaries: Zero or more dictionaries to be added.
+
+ Returns:
+ A new `dict` that has all the entries of the given dictionaries.
+ """
+ result = {}
+ for d in dictionaries:
+ result.update(d)
+ return result
+
def gentbl(name, tblgen, td_file, td_srcs, tbl_outs, library = True, **kwargs):
- """gentbl() generates tabular code from a table definition file.
-
- Args:
- name: The name of the build rule for use in dependencies.
- tblgen: The binary used to produce the output.
- td_file: The primary table definitions file.
- td_srcs: A list of table definition files included transitively.
- tbl_outs: A list of tuples (opts, out), where each opts is a string of
- options passed to tblgen, and the out is the corresponding output file
- produced.
- library: Whether to bundle the generated files into a library.
- **kwargs: Keyword arguments to pass to subsidiary cc_library() rule.
- """
- if td_file not in td_srcs:
- td_srcs += [td_file]
- includes = []
- for (opts, out) in tbl_outs:
- outdir = out[:out.rindex("/")]
- if outdir not in includes:
- includes.append(outdir)
- rule_suffix = "_".join(opts.replace("-", "_").replace("=", "_").split(" "))
- native.genrule(
- name="%s_%s_genrule" % (name, rule_suffix),
- srcs=td_srcs,
- outs=[out],
- tools=[tblgen],
- message="Generating code from table: %s" % td_file,
- cmd=(("$(location %s) " + "-I external/llvm/include " +
- "-I external/llvm/tools/clang/include " +
- "-I $$(dirname $(location %s)) " + "%s $(location %s) -o $@") % (
- tblgen, td_file, opts, td_file)))
- # For now, all generated files can be assumed to comprise public interfaces.
- # If this is not true, you should specify library = False
- # and list the generated '.inc' files in "srcs".
- if library:
- native.cc_library(name=name, textual_hdrs=[f for (_, f) in tbl_outs],
- includes=includes, **kwargs)
+ """gentbl() generates tabular code from a table definition file.
+
+ Args:
+ name: The name of the build rule for use in dependencies.
+ tblgen: The binary used to produce the output.
+ td_file: The primary table definitions file.
+ td_srcs: A list of table definition files included transitively.
+ tbl_outs: A list of tuples (opts, out), where each opts is a string of
+ options passed to tblgen, and the out is the corresponding output file
+ produced.
+ library: Whether to bundle the generated files into a library.
+ **kwargs: Keyword arguments to pass to subsidiary cc_library() rule.
+ """
+ if td_file not in td_srcs:
+ td_srcs += [td_file]
+ includes = []
+ for (opts, out) in tbl_outs:
+ outdir = out[:out.rindex("/")]
+ if outdir not in includes:
+ includes.append(outdir)
+ rule_suffix = "_".join(opts.replace("-", "_").replace("=", "_").split(" "))
+ native.genrule(
+ name = "%s_%s_genrule" % (name, rule_suffix),
+ srcs = td_srcs,
+ outs = [out],
+ tools = [tblgen],
+ message = "Generating code from table: %s" % td_file,
+ cmd = (("$(location %s) " + "-I external/llvm/include " +
+ "-I external/llvm/tools/clang/include " +
+ "-I $$(dirname $(location %s)) " + "%s $(location %s) -o $@") % (
+ tblgen,
+ td_file,
+ opts,
+ td_file,
+ )),
+ )
+
+ # For now, all generated files can be assumed to comprise public interfaces.
+ # If this is not true, you should specify library = False
+ # and list the generated '.inc' files in "srcs".
+ if library:
+ native.cc_library(
+ name = name,
+ textual_hdrs = [f for (_, f) in tbl_outs],
+ includes = includes,
+ **kwargs
+ )
def llvm_target_cmake_vars(native_arch, target_triple):
- return {
- "LLVM_HOST_TRIPLE": target_triple,
- "LLVM_DEFAULT_TARGET_TRIPLE": target_triple,
- "LLVM_NATIVE_ARCH": native_arch,
- }
+ return {
+ "LLVM_HOST_TRIPLE": target_triple,
+ "LLVM_DEFAULT_TARGET_TRIPLE": target_triple,
+ "LLVM_NATIVE_ARCH": native_arch,
+ }
def _quote(s):
- """Quotes the given string for use in a shell command.
-
- This function double-quotes the given string (in case it contains spaces or
- other special characters) and escapes any special characters (dollar signs,
- double-quotes, and backslashes) that may be present.
-
- Args:
- s: The string to quote.
- Returns:
- An escaped and quoted version of the string that can be passed to a shell
- command.
- """
- return ('"' +
- s.replace("\\", "\\\\").replace("$", "\\$").replace('"', '\\"') +
- '"')
+ """Quotes the given string for use in a shell command.
+
+ This function double-quotes the given string (in case it contains spaces or
+ other special characters) and escapes any special characters (dollar signs,
+ double-quotes, and backslashes) that may be present.
+
+ Args:
+ s: The string to quote.
+
+ Returns:
+ An escaped and quoted version of the string that can be passed to a shell
+ command.
+ """
+ return ('"' +
+ s.replace("\\", "\\\\").replace("$", "\\$").replace('"', '\\"') +
+ '"')
def cmake_var_string(cmake_vars):
- """Converts a dictionary to an input suitable for expand_cmake_vars.
+ """Converts a dictionary to an input suitable for expand_cmake_vars.
+
+ Ideally we would jist stringify in the expand_cmake_vars() rule, but select()
+ interacts badly with genrules.
- Ideally we would jist stringify in the expand_cmake_vars() rule, but select()
- interacts badly with genrules.
+ TODO(phawkins): replace the genrule() with native rule and delete this rule.
- TODO(phawkins): replace the genrule() with native rule and delete this rule.
+ Args:
+ cmake_vars: a dictionary with string keys and values that are convertable to
+ strings.
- Args:
- cmake_vars: a dictionary with string keys and values that are convertable to
- strings.
- """
- return " ".join([_quote("{}={}".format(k, str(v)))
- for (k, v) in cmake_vars.items()])
+ Returns:
+ cmake_vars in a form suitable for passing to expand_cmake_vars.
+ """
+ return " ".join([
+ _quote("{}={}".format(k, str(v)))
+ for (k, v) in cmake_vars.items()
+ ])
def expand_cmake_vars(name, src, dst, cmake_vars):
- """Expands #cmakedefine, #cmakedefine01, and CMake variables in a text file.
-
- Args:
- name: the name of the rule
- src: the input of the rule
- dst: the output of the rule
- cmake_vars: a string containing the CMake variables, as generated by
- cmake_var_string.
- """
- expand_cmake_vars_tool = Label("@org_tensorflow//third_party/llvm:expand_cmake_vars")
- native.genrule(
- name = name,
- srcs = [src],
- tools = [expand_cmake_vars_tool],
- outs = [dst],
- cmd = ("$(location {}) ".format(expand_cmake_vars_tool) + cmake_vars +
- "< $< > $@")
- )
+ """Expands #cmakedefine, #cmakedefine01, and CMake variables in a text file.
+
+ Args:
+ name: the name of the rule
+ src: the input of the rule
+ dst: the output of the rule
+ cmake_vars: a string containing the CMake variables, as generated by
+ cmake_var_string.
+ """
+ expand_cmake_vars_tool = Label("@org_tensorflow//third_party/llvm:expand_cmake_vars")
+ native.genrule(
+ name = name,
+ srcs = [src],
+ tools = [expand_cmake_vars_tool],
+ outs = [dst],
+ cmd = ("$(location {}) ".format(expand_cmake_vars_tool) + cmake_vars +
+ "< $< > $@"),
+ )
# TODO(phawkins): the set of CMake variables was hardcoded for expediency.
# However, we should really detect many of these via configure-time tests.
@@ -212,18 +252,26 @@ darwin_cmake_vars = {
# than hardcoding x86_64.
llvm_all_cmake_vars = select({
"@org_tensorflow//tensorflow:darwin": cmake_var_string(
- cmake_vars + llvm_target_cmake_vars("X86", "x86_64-apple-darwin") +
- darwin_cmake_vars),
+ _dict_add(
+ cmake_vars,
+ llvm_target_cmake_vars("X86", "x86_64-apple-darwin"),
+ darwin_cmake_vars,
+ ),
+ ),
"@org_tensorflow//tensorflow:linux_ppc64le": cmake_var_string(
- cmake_vars +
- llvm_target_cmake_vars("PowerPC", "powerpc64le-unknown-linux_gnu") +
- linux_cmake_vars,
+ _dict_add(
+ cmake_vars,
+ llvm_target_cmake_vars("PowerPC", "powerpc64le-unknown-linux_gnu"),
+ linux_cmake_vars,
+ ),
),
"//conditions:default": cmake_var_string(
- cmake_vars +
- llvm_target_cmake_vars("X86", "x86_64-unknown-linux_gnu") +
- linux_cmake_vars),
-
+ _dict_add(
+ cmake_vars,
+ llvm_target_cmake_vars("X86", "x86_64-unknown-linux_gnu"),
+ linux_cmake_vars,
+ ),
+ ),
})
llvm_linkopts = ["-ldl", "-lm", "-lpthread"]
@@ -241,7 +289,10 @@ llvm_copts = []
# Platform specific sources for libSupport.
-llvm_support_platform_specific_srcs_glob = [
- "lib/Support/Unix/*.inc",
- "lib/Support/Unix/*.h",
-]
+def llvm_support_platform_specific_srcs_glob():
+ return select({
+ "//conditions:default": native.glob([
+ "lib/Support/Unix/*.inc",
+ "lib/Support/Unix/*.h",
+ ]),
+ })
diff --git a/third_party/ngraph/build_defs.bzl b/third_party/ngraph/build_defs.bzl
index 2c9027a6b8..8ad7515aed 100644
--- a/third_party/ngraph/build_defs.bzl
+++ b/third_party/ngraph/build_defs.bzl
@@ -1,16 +1,14 @@
def clean_dep(dep):
return str(Label(dep))
-def if_ngraph(a):
+def if_ngraph(if_true, if_false = []):
"""Shorthand for select()'ing on whether we're building with nGraph support.
Returns a select statement which evaluates to if_true if we're building
with nGraph. Otherwise, the select statement evaluates to default.
"""
- ret_val = select({
- clean_dep("//tensorflow:with_ngraph_support"): a,
- "//conditions:default": []
+ return select({
+ clean_dep("//tensorflow:with_ngraph_support"): if_true,
+ "//conditions:default": if_false
})
-
- return ret_val
diff --git a/third_party/ngraph/ngraph_tf.BUILD b/third_party/ngraph/ngraph_tf.BUILD
index bbac74db0f..838f022222 100644
--- a/third_party/ngraph/ngraph_tf.BUILD
+++ b/third_party/ngraph/ngraph_tf.BUILD
@@ -26,8 +26,7 @@ cc_library(
cc_library(
name = "ngraph_tf",
- srcs =
- [
+ srcs = [
"src/ngraph_builder.h",
"src/ngraph_builder.cc",
"src/ngraph_cluster.h",
diff --git a/third_party/toolchains/BUILD b/third_party/toolchains/BUILD
index fc3183a754..ec1006fe23 100644
--- a/third_party/toolchains/BUILD
+++ b/third_party/toolchains/BUILD
@@ -17,6 +17,6 @@ platform(
remote_execution_properties = """
properties: {
name: "container-image"
- value:"docker://gcr.io/asci-toolchain/nosla-ubuntu16_04-tf@sha256:800a7b68cabef15419695c188ed33ed70adf678c2371b97b236f3ae26c38274d"
+ value:"docker://gcr.io/asci-toolchain/nosla-ubuntu16_04-tf@sha256:495a025ed5e273cfa5d53357ef93ac20500c008994e0be106c509f51555fb93c"
}""",
)