aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h
diff options
context:
space:
mode:
authorGravatar Eugene Zhulenev <ezhulenev@google.com>2019-08-19 11:44:25 -0700
committerGravatar Eugene Zhulenev <ezhulenev@google.com>2019-08-19 11:44:25 -0700
commit071311821e509d87bec609d6a3aeea9dc74cfd66 (patch)
tree686922520a522c508d6074396a0303545f1b09d5 /unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h
parentd55d392e7b1136655b4223bea8e99cb2fe0a8afd (diff)
Remove XSMM support from Tensor module
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h135
1 files changed, 0 insertions, 135 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h
index c51f3f8dd..974feb0ad 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContractionBlocking.h
@@ -67,141 +67,6 @@ class TensorContractionBlocking {
StorageIndex nc_;
};
-
-
-#if defined(EIGEN_USE_LIBXSMM)
-template <typename LhsScalar, typename RhsScalar, typename StorageIndex>
-class TensorXsmmContractionBlocking {
- public:
- TensorXsmmContractionBlocking(StorageIndex k, StorageIndex m, StorageIndex n,
- size_t max_num_threads = 1, bool transposeA = false,
- bool transposeB = false):
- k_(k), m_(m), n_(n), transposeA_(transposeA),
- transposeB_(transposeB), num_threads_(max_num_threads) {
-#ifdef EIGEN_TEST_SPECIFIC_BLOCKING_SIZES
- if (EIGEN_TEST_SPECIFIC_BLOCKING_SIZES) {
- mc_ = EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_M;
- kc_ = EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_K;
- nc_ = EIGEN_TEST_SPECIFIC_BLOCKING_SIZE_N;
- outer_m_ = EIGEN_TEST_SPECIFIC_OUTER_BLOCKING_SIZE_M;
- outer_k_ = EIGEN_TEST_SPECIFIC_OUTER_BLOCKING_SIZE_K;
- outer_n_ = EIGEN_TEST_SPECIFIC_OUTER_BLOCKING_SIZE_N;
- copyA_ = EIGEN_TEST_SPECIFIC_BLOCKING_COPY_A;
- copyB_ = EIGEN_TEST_SPECIFIC_BLOCKING_COPY_B;
- outer_m_ = outer_m_ != 0 ? outer_m_ : m;
- outer_k_ = outer_k_ != 0 ? outer_k_ : k;
- outer_n_ = outer_n_ != 0 ? outer_n_ : n;
- }
-#else
- // Defaults, possibly overridden per-platform.
- copyA_ = true;
- copyB_ = false;
-
- // If the matrix is small enough, don't do blocking, just call single xsmm
- // kernel.
- if (static_cast<double>(m)*k*n <= LIBXSMM_THRESHOLD) {
- mc_ = m; kc_ = k; nc_ = n;
- outer_m_ = m; outer_k_ = k; outer_n_ = n;
- copyA_ = false; copyB_ = false;
- } else {
- int arch = libxsmm_cpuid_x86();
-
- if (arch == LIBXSMM_X86_AVX512_CORE) {
- // skylake
- mc_ = 64; kc_ = 64; nc_ = 24;
- outer_m_ = 512; outer_k_ = 512; outer_n_ = 24*22;
- // Hack to use this kernel architecture as the other one has performance
- // issues (no hardware prefetching).
- // TODO(nishantpatil): This should be removed if the issues are fixed,
- // or this one becomes the default.
- setenv("LIBXSMM_AVX512_CLASSIC_GEMM", "1", 1);
- } else if (arch == LIBXSMM_X86_AVX2) {
- // haswell
- mc_ = 32; kc_ = 192; nc_ = 33;
- outer_m_ = 512; outer_k_ = 3*192; outer_n_ = 33*16;
- } else if (arch == LIBXSMM_X86_AVX) {
- // ivybridge
- mc_ = 32; kc_ = 192; nc_ = 48;
- outer_m_ = 512; outer_k_ = 3*192; outer_n_ = 48*11;
- } else {
- // generic kernel size, usually performing well
- mc_ = 32; kc_ = 128; nc_ = 32;
- outer_m_ = 512; outer_k_ = 512; outer_n_ = 512;
- }
-
- // Only copy if it makes the stride smaller.
- copyA_ = copyA_ && (m > mc_);
- copyB_ = copyB_ && (k > kc_);
- }
-
- // We need to copy anyway if transposing
- copyA_ = copyA_ || transposeA;
- copyB_ = copyB_ || transposeB;
-
- // See libxsmm_gemm_prefetch_type definition in libxsmm_typedefs.h
- prefetch_ = LIBXSMM_PREFETCH_AL2CL2BL2_VIA_C;
-
-#endif
-
- mc_ = mc_ > m ? m : mc_;
- nc_ = nc_ > n ? n : nc_;
- kc_ = kc_ > k ? k : kc_;
-
- size_t compute_parallelism = (m / mc_) * (n / nc_);
- size_t pack_parallelism = 0;
- if (copyA_) {
- pack_parallelism += (m / mc_) * (k / kc_);
- }
- if (copyB_) {
- pack_parallelism += (n / nc_) * (k / kc_);
- }
- size_t parallelism = numext::maxi(compute_parallelism, pack_parallelism);
-
- num_threads_ = numext::mini<size_t>(num_threads_,
- parallelism / MIN_JOBS_PER_THREAD);
- num_threads_ = numext::maxi<size_t>(num_threads_, 1);
-
- // For optimal performance outer block sizes should be multiplies of kernel
- // sizes, or bigger than matrix size (=no outer blocking).
- eigen_assert(outer_m_ % mc_ == 0 || outer_m_ >= m);
- eigen_assert(outer_k_ % kc_ == 0 || outer_k_ >= k);
- eigen_assert(outer_n_ % nc_ == 0 || outer_n_ >= n);
- }
-
- EIGEN_ALWAYS_INLINE StorageIndex kc() const { return kc_; }
- EIGEN_ALWAYS_INLINE StorageIndex mc() const { return mc_; }
- EIGEN_ALWAYS_INLINE StorageIndex nc() const { return nc_; }
- EIGEN_ALWAYS_INLINE StorageIndex outer_k() const { return outer_k_; }
- EIGEN_ALWAYS_INLINE StorageIndex outer_m() const { return outer_m_; }
- EIGEN_ALWAYS_INLINE StorageIndex outer_n() const { return outer_n_; }
- EIGEN_ALWAYS_INLINE bool copyA() const { return copyA_; }
- EIGEN_ALWAYS_INLINE bool copyB() const { return copyB_; }
- EIGEN_ALWAYS_INLINE bool transposeA() const { return transposeA_; }
- EIGEN_ALWAYS_INLINE bool transposeB() const { return transposeB_; }
- EIGEN_ALWAYS_INLINE int num_threads() const { return num_threads_; }
- EIGEN_ALWAYS_INLINE StorageIndex blocks_m() const { return divup(m_, mc_); }
- EIGEN_ALWAYS_INLINE StorageIndex blocks_k() const { return divup(k_, kc_); }
- EIGEN_ALWAYS_INLINE StorageIndex blocks_n() const { return divup(n_, nc_); }
- EIGEN_ALWAYS_INLINE libxsmm_gemm_prefetch_type prefetch() const {
- return prefetch_;
- }
-
- private:
- StorageIndex k_, m_, n_;
- StorageIndex kc_, mc_, nc_;
- StorageIndex outer_k_, outer_m_, outer_n_;
- bool copyA_, copyB_, transposeA_, transposeB_;
- size_t num_threads_;
-
- // Threshold for m*k*n to skip blocking and just call libxsmm
- const double LIBXSMM_THRESHOLD = 80*80*80;
- // For computing optimal number of threads - so that each thread gets at least
- // that many jobs.
- const double MIN_JOBS_PER_THREAD = 3;
- libxsmm_gemm_prefetch_type prefetch_;
-};
-#endif // EIGEN_USE_LIBXSMM
-
} // end namespace internal
} // end namespace Eigen