From b3e7c9132d41da75c0a6af783300cb11101010db Mon Sep 17 00:00:00 2001 From: Paul Tucker Date: Mon, 16 Jul 2018 17:26:05 -0700 Subject: Add optional Allocator argument to ThreadPoolDevice constructor. When supplied, this allocator will be used in place of internal::aligned_malloc. This permits e.g. use of a NUMA-node specific allocator where the thread-pool is also restricted a single NUMA-node. --- .../Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h index 90fd99027..be397e1b6 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -95,14 +95,20 @@ static EIGEN_STRONG_INLINE void wait_until_ready(SyncType* n) { // Build a thread pool device on top the an existing pool of threads. struct ThreadPoolDevice { // The ownership of the thread pool remains with the caller. - ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores) : pool_(pool), num_threads_(num_cores) { } + ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores) + : pool_(pool), num_threads_(num_cores), allocator_(nullptr) { } EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { - return internal::aligned_malloc(num_bytes); + return allocator_ ? allocator_->allocate(num_bytes) + : internal::aligned_malloc(num_bytes); } EIGEN_STRONG_INLINE void deallocate(void* buffer) const { - internal::aligned_free(buffer); + if (allocator_) { + allocator_->deallocate(buffer); + } else { + internal::aligned_free(buffer); + } } EIGEN_STRONG_INLINE void memcpy(void* dst, const void* src, size_t n) const { @@ -267,9 +273,13 @@ struct ThreadPoolDevice { // Thread pool accessor. ThreadPoolInterface* getPool() const { return pool_; } + // Allocator accessor. + Allocator* getAllocator() const { return allocator_; } + private: ThreadPoolInterface* pool_; int num_threads_; + Allocator* allocator_; }; -- cgit v1.2.3 From 4e9848fa8600be69dfb51405606eafa1dba8d0bf Mon Sep 17 00:00:00 2001 From: Paul Tucker Date: Mon, 16 Jul 2018 17:53:36 -0700 Subject: Actually add optional Allocator* arg to ThreadPoolDevice(). --- unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h index be397e1b6..c9534d400 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -95,8 +95,8 @@ static EIGEN_STRONG_INLINE void wait_until_ready(SyncType* n) { // Build a thread pool device on top the an existing pool of threads. struct ThreadPoolDevice { // The ownership of the thread pool remains with the caller. - ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores) - : pool_(pool), num_threads_(num_cores), allocator_(nullptr) { } + ThreadPoolDevice(ThreadPoolInterface* pool, int num_cores, Allocator* allocator = nullptr) + : pool_(pool), num_threads_(num_cores), allocator_(allocator) { } EIGEN_STRONG_INLINE void* allocate(size_t num_bytes) const { return allocator_ ? allocator_->allocate(num_bytes) -- cgit v1.2.3 From d4afccde5a9553ddfb48b0f5fad0115cd8bf791a Mon Sep 17 00:00:00 2001 From: Paul Tucker Date: Thu, 19 Jul 2018 17:43:44 -0700 Subject: Add test coverage for ThreadPoolDevice optional allocator. --- .../CXX11/src/Tensor/TensorDeviceThreadPool.h | 7 ++++ unsupported/test/cxx11_tensor_thread_pool.cpp | 45 ++++++++++++++++++++-- 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h index c9534d400..f4123b71d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -91,6 +91,13 @@ static EIGEN_STRONG_INLINE void wait_until_ready(SyncType* n) { } } +// An abstract interface to a device specific memory allocator. +class Allocator { + public: + virtual ~Allocator() {} + EIGEN_DEVICE_FUNC virtual void* allocate(size_t num_bytes) const = 0; + EIGEN_DEVICE_FUNC virtual void deallocate(void* buffer) const = 0; +}; // Build a thread pool device on top the an existing pool of threads. struct ThreadPoolDevice { diff --git a/unsupported/test/cxx11_tensor_thread_pool.cpp b/unsupported/test/cxx11_tensor_thread_pool.cpp index 2ef665f30..200664740 100644 --- a/unsupported/test/cxx11_tensor_thread_pool.cpp +++ b/unsupported/test/cxx11_tensor_thread_pool.cpp @@ -16,6 +16,25 @@ using Eigen::Tensor; +class TestAllocator : public Allocator { + public: + ~TestAllocator() override {} + EIGEN_DEVICE_FUNC void* allocate(size_t num_bytes) const override { + const_cast(this)->alloc_count_++; + return internal::aligned_malloc(num_bytes); + } + EIGEN_DEVICE_FUNC void deallocate(void* buffer) const override { + const_cast(this)->dealloc_count_++; + internal::aligned_free(buffer); + } + + int alloc_count() const { return alloc_count_; } + int dealloc_count() const { return dealloc_count_; } + + private: + int alloc_count_ = 0; + int dealloc_count_ = 0; +}; void test_multithread_elementwise() { @@ -320,14 +339,14 @@ void test_multithread_random() } template -void test_multithread_shuffle() +void test_multithread_shuffle(Allocator* allocator) { Tensor tensor(17,5,7,11); tensor.setRandom(); const int num_threads = internal::random(2, 11); ThreadPool threads(num_threads); - Eigen::ThreadPoolDevice device(&threads, num_threads); + Eigen::ThreadPoolDevice device(&threads, num_threads, allocator); Tensor shuffle(7,5,11,17); array shuffles = {{2,1,3,0}}; @@ -344,6 +363,21 @@ void test_multithread_shuffle() } } +void test_threadpool_allocate(TestAllocator* allocator) +{ + const int num_threads = internal::random(2, 11); + const int num_allocs = internal::random(2, 11); + ThreadPool threads(num_threads); + Eigen::ThreadPoolDevice device(&threads, num_threads, allocator); + + for (int a = 0; a < num_allocs; ++a) { + void* ptr = device.allocate(512); + device.deallocate(ptr); + } + VERIFY(allocator != nullptr); + VERIFY_IS_EQUAL(allocator->alloc_count(), num_allocs); + VERIFY_IS_EQUAL(allocator->dealloc_count(), num_allocs); +} void test_cxx11_tensor_thread_pool() { @@ -368,6 +402,9 @@ void test_cxx11_tensor_thread_pool() CALL_SUBTEST_6(test_memcpy()); CALL_SUBTEST_6(test_multithread_random()); - CALL_SUBTEST_6(test_multithread_shuffle()); - CALL_SUBTEST_6(test_multithread_shuffle()); + + TestAllocator test_allocator; + CALL_SUBTEST_6(test_multithread_shuffle(nullptr)); + CALL_SUBTEST_6(test_multithread_shuffle(&test_allocator)); + CALL_SUBTEST_6(test_threadpool_allocate(&test_allocator)); } -- cgit v1.2.3 From 385f7b8d0ca926d00c71987ab308202511e5c753 Mon Sep 17 00:00:00 2001 From: Paul Tucker Date: Tue, 31 Jul 2018 13:52:18 -0700 Subject: Change getAllocator() to allocator() in ThreadPoolDevice. --- unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h index f4123b71d..f8188ffde 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -281,7 +281,7 @@ struct ThreadPoolDevice { ThreadPoolInterface* getPool() const { return pool_; } // Allocator accessor. - Allocator* getAllocator() const { return allocator_; } + Allocator* allocator() const { return allocator_; } private: ThreadPoolInterface* pool_; -- cgit v1.2.3 From 62169419aba9048c6c66c71ce4910241e594cbe1 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 1 Aug 2018 23:35:34 +0200 Subject: Fix two regressions introduced in previous merges: bad usage of EIGEN_HAS_VARIADIC_TEMPLATES and linking issue. --- Eigen/src/Core/util/Macros.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index f59b93608..0f3b428c9 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -1076,11 +1076,13 @@ namespace Eigen { # endif #endif -#ifdef EIGEN_HAS_VARIADIC_TEMPLATES +#if EIGEN_HAS_VARIADIC_TEMPLATES // The all function is used to enable a variadic version of eigen_assert which can take a parameter pack as its input. namespace Eigen { namespace internal { -bool all(){ return true; } + +inline bool all(){ return true; } + template bool all(T t, Ts ... ts){ return t && all(ts...); } -- cgit v1.2.3 From 40d6d020a098d9f828bad5e2a1accfc5ee1a1289 Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Thu, 2 Aug 2018 13:34:53 +0100 Subject: Fixing ambigous constructor error for Clang compiler. --- unsupported/test/cxx11_tensor_concatenation.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/unsupported/test/cxx11_tensor_concatenation.cpp b/unsupported/test/cxx11_tensor_concatenation.cpp index 9189a609b..f53515b4e 100644 --- a/unsupported/test/cxx11_tensor_concatenation.cpp +++ b/unsupported/test/cxx11_tensor_concatenation.cpp @@ -50,7 +50,13 @@ static void test_static_dimension_failure() .reshape(Tensor::Dimensions(2, 3, 1)) .concatenate(right, 0); Tensor alternative = left - .concatenate(right.reshape(Tensor::Dimensions{{{2, 3}}}), 0); + // Clang compiler break with {{{}}} with an ambigous error on copy constructor + // the variadic DSize constructor added for #ifndef EIGEN_EMULATE_CXX11_META_H. + // Solution: + // either the code should change to + // Tensor::Dimensions{{2, 3}} + // or Tensor::Dimensions{Tensor::Dimensions{{2, 3}}} + .concatenate(right.reshape(Tensor::Dimensions{{2, 3}}), 0); } template -- cgit v1.2.3 From 516d2621b96a0c41ff999781b8bbcaa527a7c6ee Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Thu, 2 Aug 2018 14:30:48 +0100 Subject: fixing compilation error for cxx11_tensor_trace.cpp error on Microsoft Visual Studio. --- unsupported/test/cxx11_tensor_trace.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/unsupported/test/cxx11_tensor_trace.cpp b/unsupported/test/cxx11_tensor_trace.cpp index 1579bc1eb..0cb23060e 100644 --- a/unsupported/test/cxx11_tensor_trace.cpp +++ b/unsupported/test/cxx11_tensor_trace.cpp @@ -37,7 +37,7 @@ static void test_all_dimensions_trace() { VERIFY_IS_EQUAL(result1(), sum); Tensor tensor2(7, 7, 7, 7, 7); - array dims({{2, 1, 0, 3, 4}}); + array dims = { { 2, 1, 0, 3, 4 } }; Tensor result2 = tensor2.trace(dims); VERIFY_IS_EQUAL(result2.rank(), 0); sum = 0.0f; @@ -52,7 +52,7 @@ template static void test_simple_trace() { Tensor tensor1(3, 5, 3); tensor1.setRandom(); - array dims1({{0, 2}}); + array dims1 = { { 0, 2 } }; Tensor result1 = tensor1.trace(dims1); VERIFY_IS_EQUAL(result1.rank(), 1); VERIFY_IS_EQUAL(result1.dimension(0), 5); @@ -67,7 +67,7 @@ static void test_simple_trace() { Tensor tensor2(5, 5, 7, 7); tensor2.setRandom(); - array dims2({{2, 3}}); + array dims2 = { { 2, 3 } }; Tensor result2 = tensor2.trace(dims2); VERIFY_IS_EQUAL(result2.rank(), 2); VERIFY_IS_EQUAL(result2.dimension(0), 5); @@ -82,7 +82,7 @@ static void test_simple_trace() { } } - array dims3({{1, 0}}); + array dims3 = { { 1, 0 } }; Tensor result3 = tensor2.trace(dims3); VERIFY_IS_EQUAL(result3.rank(), 2); VERIFY_IS_EQUAL(result3.dimension(0), 7); @@ -99,7 +99,7 @@ static void test_simple_trace() { Tensor tensor3(3, 7, 3, 7, 3); tensor3.setRandom(); - array dims4({{0, 2, 4}}); + array dims4 = { { 0, 2, 4 } }; Tensor result4 = tensor3.trace(dims4); VERIFY_IS_EQUAL(result4.rank(), 2); VERIFY_IS_EQUAL(result4.dimension(0), 7); @@ -116,7 +116,7 @@ static void test_simple_trace() { Tensor tensor4(3, 7, 4, 7, 5); tensor4.setRandom(); - array dims5({{1, 3}}); + array dims5 = { { 1, 3 } }; Tensor result5 = tensor4.trace(dims5); VERIFY_IS_EQUAL(result5.rank(), 3); VERIFY_IS_EQUAL(result5.dimension(0), 3); @@ -140,7 +140,7 @@ template static void test_trace_in_expr() { Tensor tensor(2, 3, 5, 3); tensor.setRandom(); - array dims({{1, 3}}); + array dims = { { 1, 3 } }; Tensor result(2, 5); result = result.constant(1.0f) - tensor.trace(dims); VERIFY_IS_EQUAL(result.rank(), 2); @@ -168,4 +168,4 @@ EIGEN_DECLARE_TEST(cxx11_tensor_trace) { CALL_SUBTEST(test_simple_trace()); CALL_SUBTEST(test_trace_in_expr()); CALL_SUBTEST(test_trace_in_expr()); -} +} \ No newline at end of file -- cgit v1.2.3 From 2bf1cc8cf72396c8c0c8103a5e941121534cf858 Mon Sep 17 00:00:00 2001 From: Gustavo Lima Chaves Date: Thu, 2 Aug 2018 15:55:36 -0700 Subject: Fix 256 bit packet size assumptions in unit tests. Like in change 2606abed535744fcaa41b923c71338a06b8ed3fa , we have hit the threshould again. With AVX512 builds we would never have Vector8f packets aligned at 64 bytes (the new value of EIGEN_MAX_ALIGN_BYTES after change 405859f18dac56f324e1d93ca8721d5f7fd22c62 , for AVX512-enabled builds). This makes test/dynalloc.cpp pass for those builds. --- test/dynalloc.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/dynalloc.cpp b/test/dynalloc.cpp index ceecd76e3..1c74866ba 100644 --- a/test/dynalloc.cpp +++ b/test/dynalloc.cpp @@ -15,6 +15,7 @@ #define ALIGNMENT 1 #endif +typedef Matrix Vector16f; typedef Matrix Vector8f; void check_handmade_aligned_malloc() @@ -70,7 +71,7 @@ struct MyStruct { EIGEN_MAKE_ALIGNED_OPERATOR_NEW char dummychar; - Vector8f avec; + Vector16f avec; }; class MyClassA @@ -78,7 +79,7 @@ class MyClassA public: EIGEN_MAKE_ALIGNED_OPERATOR_NEW char dummychar; - Vector8f avec; + Vector16f avec; }; template void check_dynaligned() @@ -145,6 +146,7 @@ EIGEN_DECLARE_TEST(dynalloc) CALL_SUBTEST(check_dynaligned() ); CALL_SUBTEST(check_dynaligned() ); CALL_SUBTEST(check_dynaligned() ); + CALL_SUBTEST(check_dynaligned() ); } { -- cgit v1.2.3 From 01358300d581f94c842bcdf35266b50f2795ded3 Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Fri, 3 Aug 2018 16:59:15 +0100 Subject: Creating separate SYCL required PR for uncontroversial files. --- Eigen/Core | 6 ++++ Eigen/src/Core/arch/GPU/Half.h | 7 ++++- unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h | 4 +-- unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h | 4 +++ .../src/SpecialFunctions/SpecialFunctionsImpl.h | 36 +++++++++++++++++++--- 5 files changed, 50 insertions(+), 7 deletions(-) diff --git a/Eigen/Core b/Eigen/Core index 864bde551..080511fc9 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -200,6 +200,12 @@ using std::ptrdiff_t; #include "src/Core/arch/GPU/MathFunctions.h" #endif +#if defined EIGEN_VECTORIZE_SYCL + #include "src/Core/arch/SYCL/InteropHeaders.h" + #include "src/Core/arch/SYCL/PacketMath.h" + #include "src/Core/arch/SYCL/MathFunctions.h" + #include "src/Core/arch/SYCL/TypeCasting.h" +#endif #include "src/Core/arch/Default/Settings.h" #include "src/Core/functors/TernaryFunctors.h" diff --git a/Eigen/src/Core/arch/GPU/Half.h b/Eigen/src/Core/arch/GPU/Half.h index ab9d27591..aca56fa72 100644 --- a/Eigen/src/Core/arch/GPU/Half.h +++ b/Eigen/src/Core/arch/GPU/Half.h @@ -83,7 +83,11 @@ struct __half_raw { #if defined(EIGEN_CUDACC_VER) && EIGEN_CUDACC_VER < 90000 // In CUDA < 9.0, __half is the equivalent of CUDA 9's __half_raw typedef __half __half_raw; - #endif + #endif // defined(EIGEN_HAS_CUDA_FP16) + +#elif defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__) +typedef cl::sycl::half __half_raw; + #endif EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half_raw raw_uint16_to_half(unsigned short x); @@ -200,6 +204,7 @@ struct half : public half_impl::half_base { x = other.x; return *this; } + }; } // end namespace Eigen diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h index c8b2fad1e..ea53bb04b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTrace.h @@ -273,11 +273,11 @@ struct TensorEvaluator, Device> Dimensions m_dimensions; TensorEvaluator m_impl; + // Initialize the size of the trace dimension + Index m_traceDim; const Device& m_device; array m_reduced; array m_reducedDims; - // Initialize the size of the trace dimension - Index m_traceDim; array m_outputStrides; array m_reducedStrides; array m_preservedStrides; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h index 006b37921..0a394c88d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h @@ -59,6 +59,7 @@ struct traits > template struct MakePointer { typedef T* Type; typedef T& RefType; + typedef T ScalarType; }; typedef typename MakePointer::Type PointerType; @@ -80,6 +81,7 @@ struct traits > template struct MakePointer { typedef T* Type; typedef T& RefType; + typedef T ScalarType; }; typedef typename MakePointer::Type PointerType; @@ -105,6 +107,8 @@ struct traits > typedef MakePointer_ MakePointerT; typedef typename MakePointerT::Type Type; typedef typename MakePointerT::RefType RefType; + typedef typename MakePointerT::ScalarType ScalarType; + }; typedef typename MakePointer::Type PointerType; diff --git a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h index dbcc9d8ac..5784cbc86 100644 --- a/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h +++ b/unsupported/Eigen/src/SpecialFunctions/SpecialFunctionsImpl.h @@ -193,6 +193,8 @@ struct lgamma_impl { #if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__) int dummy; return ::lgammaf_r(x, &dummy); +#elif defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__) + return cl::sycl::lgamma(x); #else return ::lgammaf(x); #endif @@ -206,6 +208,8 @@ struct lgamma_impl { #if !defined(EIGEN_GPU_COMPILE_PHASE) && (defined(_BSD_SOURCE) || defined(_SVID_SOURCE)) && !defined(__APPLE__) int dummy; return ::lgamma_r(x, &dummy); +#elif defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__) + return cl::sycl::lgamma(x); #else return ::lgamma(x); #endif @@ -423,13 +427,25 @@ struct erf_retval { template <> struct erf_impl { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE float run(float x) { return ::erff(x); } + static EIGEN_STRONG_INLINE float run(float x) { +#if defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__) + return cl::sycl::erf(x); +#else + return ::erff(x); +#endif + } }; template <> struct erf_impl { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE double run(double x) { return ::erf(x); } + static EIGEN_STRONG_INLINE double run(double x) { +#if defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__) + return cl::sycl::erf(x); +#else + return ::erf(x); +#endif + } }; #endif // EIGEN_HAS_C99_MATH @@ -456,13 +472,25 @@ struct erfc_retval { template <> struct erfc_impl { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE float run(const float x) { return ::erfcf(x); } + static EIGEN_STRONG_INLINE float run(const float x) { +#if defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__) + return cl::sycl::erfc(x); +#else + return ::erfcf(x); +#endif + } }; template <> struct erfc_impl { EIGEN_DEVICE_FUNC - static EIGEN_STRONG_INLINE double run(const double x) { return ::erfc(x); } + static EIGEN_STRONG_INLINE double run(const double x) { +#if defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__) + return cl::sycl::erfc(x); +#else + return ::erfc(x); +#endif + } }; #endif // EIGEN_HAS_C99_MATH -- cgit v1.2.3 From 3074b1ff9e61a14384accdbda7c8b3b520140360 Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Fri, 3 Aug 2018 17:13:44 +0100 Subject: Fixing the compilation error. --- unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h | 1 + 1 file changed, 1 insertion(+) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index 8ed1796df..ec1dc0fab 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -21,6 +21,7 @@ namespace Eigen { template struct MakePointer { typedef T* Type; typedef T& RefType; + typedef T ScalarType; }; namespace internal{ -- cgit v1.2.3 From bcb29f890ccdbf4922780ed5da0e23db65d7ae64 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Fri, 3 Aug 2018 10:18:53 -0700 Subject: Fix initialization order. --- unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h index e1649fb47..e604456e8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorContraction.h @@ -274,8 +274,8 @@ struct TensorContractionEvaluatorBase op.lhsExpression(), op.rhsExpression()), device), m_rightImpl(choose(Cond(Layout) == static_cast(ColMajor)>(), op.rhsExpression(), op.lhsExpression()), device), - m_output_kernel(op.outputKernel()), m_device(device), + m_output_kernel(op.outputKernel()), m_result(NULL) { EIGEN_STATIC_ASSERT((static_cast(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout)), -- cgit v1.2.3 From 7f8b53fd0e359aa2b1a588489f31135a8c52b55f Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Wed, 1 Aug 2018 12:36:24 -0700 Subject: bug #1580: Fix cuda clang build. STL is not supported, so std::equal_to and std::not_equal breaks compilation. Update the definition of EIGEN_CONSTEXPR_ARE_DEVICE_FUNC to exclude clang. See also PR 450. --- Eigen/src/Core/util/Macros.h | 20 +++++++++++--------- Eigen/src/Core/util/Meta.h | 2 +- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 0f3b428c9..9a2f5ab05 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -571,20 +571,19 @@ // Does the compiler fully support const expressions? (as in c++14) #ifndef EIGEN_HAS_CONSTEXPR - #if defined(EIGEN_CUDACC) // Const expressions are supported provided that c++11 is enabled and we're using either clang or nvcc 7.5 or above - #if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && (EIGEN_COMP_CLANG || EIGEN_CUDACC_VER >= 70500)) - #define EIGEN_HAS_CONSTEXPR 1 - #endif + #if EIGEN_MAX_CPP_VER>=14 && (__cplusplus > 199711L && (EIGEN_COMP_CLANG || EIGEN_CUDACC_VER >= 70500)) + #define EIGEN_HAS_CONSTEXPR 1 + #endif #elif EIGEN_MAX_CPP_VER>=14 && (__has_feature(cxx_relaxed_constexpr) || (defined(__cplusplus) && __cplusplus >= 201402L) || \ (EIGEN_GNUC_AT_LEAST(4,8) && (__cplusplus > 199711L)) || \ (EIGEN_COMP_CLANG >= 306 && (__cplusplus > 199711L))) - #define EIGEN_HAS_CONSTEXPR 1 + #define EIGEN_HAS_CONSTEXPR 1 #endif #ifndef EIGEN_HAS_CONSTEXPR - #define EIGEN_HAS_CONSTEXPR 0 + #define EIGEN_HAS_CONSTEXPR 0 #endif #endif // EIGEN_HAS_CONSTEXPR @@ -643,9 +642,12 @@ #ifdef __CUDACC_RELAXED_CONSTEXPR__ #define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC #endif - #elif defined(__clang__) && defined(__CUDA__) - // clang++ always considers constexpr functions as implicitly __host__ __device__ - #define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC + // See bug 1580: clang/CUDA fails to make the following calls + // to constexpr bool std::equal_to::operator() even when + // EIGEN_CONSTEXPR_ARE_DEVICE_FUNC is defined in c++14 only. + // #elif defined(__clang__) && defined(__CUDA__) && EIGEN_HAS_CONSTEXPR == 1 + // // clang++ always considers constexpr functions as implicitly __host__ __device__ + // #define EIGEN_CONSTEXPR_ARE_DEVICE_FUNC #endif #endif diff --git a/Eigen/src/Core/util/Meta.h b/Eigen/src/Core/util/Meta.h index 658cfa9eb..f27b8e85d 100755 --- a/Eigen/src/Core/util/Meta.h +++ b/Eigen/src/Core/util/Meta.h @@ -569,7 +569,7 @@ template struct scalar_product_traits } // end namespace internal namespace numext { - + #if defined(EIGEN_GPU_COMPILE_PHASE) template EIGEN_DEVICE_FUNC void swap(T &a, T &b) { T tmp = b; b = a; a = tmp; } #else -- cgit v1.2.3 From 09c81ac03335584674371369d0045510dc83aba4 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sat, 4 Aug 2018 00:17:37 +0200 Subject: bug #1451: fix numeric_limits> with a reference as derivative type --- unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h index 279fe5cd3..13d959df4 100755 --- a/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h +++ b/unsupported/Eigen/src/AutoDiff/AutoDiffScalar.h @@ -684,10 +684,15 @@ template struct NumTraits > } namespace std { + template class numeric_limits > : public numeric_limits {}; +template +class numeric_limits > + : public numeric_limits {}; + } // namespace std #endif // EIGEN_AUTODIFF_SCALAR_H -- cgit v1.2.3 From 36e7e7dd8fc995c8a7cb8caa924663a56bc4cc3a Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Mon, 6 Aug 2018 13:16:32 -0700 Subject: Forward declare NoOpOutputKernel as struct rather than class to be consistent with implementation. --- unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index ec1dc0fab..0dd524a30 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -98,7 +98,7 @@ template class TensorForcedEvalOp; template class TensorDevice; template struct TensorEvaluator; -class NoOpOutputKernel; +struct NoOpOutputKernel; struct DefaultDevice; struct ThreadPoolDevice; -- cgit v1.2.3 From d011d05fd6306de11fbc74c5b4fdaa929f2fe3ac Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 6 Aug 2018 13:40:51 -0700 Subject: Fixed compilation errors. --- unsupported/Eigen/CXX11/src/Tensor/TensorBase.h | 24 +++++++++++----------- .../Eigen/CXX11/src/Tensor/TensorConvolution.h | 4 ++-- .../Eigen/CXX11/src/Tensor/TensorEvaluator.h | 4 ++-- .../Eigen/CXX11/src/Tensor/TensorForcedEval.h | 4 ++-- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 97f90f638..ab3731952 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -538,8 +538,8 @@ class TensorBase // Fourier transforms template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorFFTOp - fft(const FFT& fft) const { - return TensorFFTOp(derived(), fft); + fft(const FFT& dims) const { + return TensorFFTOp(derived(), dims); } // Scan. @@ -723,8 +723,8 @@ class TensorBase template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorBroadcastingOp - broadcast(const Broadcast& broadcast) const { - return TensorBroadcastingOp(derived(), broadcast); + broadcast(const Broadcast& bcast) const { + return TensorBroadcastingOp(derived(), bcast); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -832,8 +832,8 @@ class TensorBase } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorShufflingOp - shuffle(const Shuffle& shuffle) const { - return TensorShufflingOp(derived(), shuffle); + shuffle(const Shuffle& shfl) const { + return TensorShufflingOp(derived(), shfl); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorStridingOp @@ -1030,13 +1030,13 @@ class TensorBase : public TensorBase { template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorShufflingOp - shuffle(const Shuffle& shuffle) const { - return TensorShufflingOp(derived(), shuffle); + shuffle(const Shuffle& shfl) const { + return TensorShufflingOp(derived(), shfl); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp - shuffle(const Shuffle& shuffle) { - return TensorShufflingOp(derived(), shuffle); + shuffle(const Shuffle& shfl) { + return TensorShufflingOp(derived(), shfl); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -1052,8 +1052,8 @@ class TensorBase : public TensorBase { // Select the device on which to evaluate the expression. template - TensorDevice device(const DeviceType& device) { - return TensorDevice(device, derived()); + TensorDevice device(const DeviceType& dev) { + return TensorDevice(dev, derived()); } protected: diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h index 0d3ca966c..e3e650fa5 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h @@ -527,8 +527,8 @@ struct TensorEvaluator EvalTo; EvalTo evalToTmp(local, m_kernelArg); - const bool PacketAccess = internal::IsVectorizable::value; - internal::TensorExecutor::run(evalToTmp, m_device); + const bool Vectorize = internal::IsVectorizable::value; + internal::TensorExecutor::run(evalToTmp, m_device); m_kernel = local; m_local_kernel = true; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index 8f7a81575..028902fea 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -126,7 +126,7 @@ struct TensorEvaluator } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector* resources) const {} + std::vector*) const {} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(TensorBlock* block) const { assert(m_data != NULL); @@ -255,7 +255,7 @@ struct TensorEvaluator } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void getResourceRequirements( - std::vector* resources) const {} + std::vector*) const {} EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void block(TensorBlock* block) const { assert(m_data != NULL); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h index a456f308b..2778bf5ec 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -124,8 +124,8 @@ struct TensorEvaluator, Device> } typedef TensorEvalToOp< const typename internal::remove_const::type > EvalTo; EvalTo evalToTmp(m_buffer, m_op); - const bool PacketAccess = internal::IsVectorizable::value; - internal::TensorExecutor::type, PacketAccess>::run(evalToTmp, m_device); + const bool Vectorize = internal::IsVectorizable::value; + internal::TensorExecutor::type, Vectorize>::run(evalToTmp, m_device); return true; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { -- cgit v1.2.3 From 10d286f55b30bfcd45be3e83c44edde5ac184270 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 6 Aug 2018 16:00:29 -0700 Subject: Silenced a couple of compilation warnings. --- unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h | 2 +- unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h index 84cf6d216..dc8b5ae6d 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h @@ -747,7 +747,7 @@ class TensorBlockMapper { // block dimension sizes based on "square" dimension size target. const size_t dim_size_target = static_cast( std::pow(static_cast(min_target_size), - 1.0 / static_cast(block_dim_sizes.rank()))); + 1.0f / static_cast(block_dim_sizes.rank()))); for (size_t i = 0; i < block_dim_sizes.rank(); ++i) { // TODO(andydavis) Adjust the inner most 'block_dim_size' to make it // a multiple of the packet size. Note that reducing diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h index 0fc49255d..e25dd9cf8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorShuffling.h @@ -61,8 +61,8 @@ class TensorShufflingOp : public TensorBase typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp(const XprType& expr, const Shuffle& shuffle) - : m_xpr(expr), m_shuffle(shuffle) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorShufflingOp(const XprType& expr, const Shuffle& shfl) + : m_xpr(expr), m_shuffle(shfl) {} EIGEN_DEVICE_FUNC const Shuffle& shufflePermutation() const { return m_shuffle; } -- cgit v1.2.3 From 693fb1d41e1b267ae149b8f368fc3008bd59aab9 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Tue, 7 Aug 2018 17:18:51 -0700 Subject: Fix init order. --- unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h index e3e650fa5..a07e32db0 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolution.h @@ -786,7 +786,7 @@ struct TensorEvaluator(TensorEvaluator::Layout) == static_cast(TensorEvaluator::Layout)), YOU_MADE_A_PROGRAMMING_MISTAKE); -- cgit v1.2.3 From 908b906d79a30b357f82e9c8a1e0d238e2f3469a Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Wed, 8 Aug 2018 10:01:10 +0100 Subject: Disabling assert inside SYCL kernel. --- test/main.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/main.h b/test/main.h index 5d64bc736..de8a4865f 100644 --- a/test/main.h +++ b/test/main.h @@ -193,7 +193,7 @@ namespace Eigen #define EIGEN_DEFAULT_IO_FORMAT IOFormat(4, 0, " ", "\n", "", "", "", "") -#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(__HIP_DEVICE_COMPILE__) +#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(__HIP_DEVICE_COMPILE__) && !defined(__SYCL_DEVICE_ONLY__) #define EIGEN_EXCEPTIONS #endif @@ -272,7 +272,7 @@ namespace Eigen } #endif //EIGEN_EXCEPTIONS - #elif !defined(__CUDACC__) && !defined(__HIPCC__)// EIGEN_DEBUG_ASSERTS + #elif !defined(__CUDACC__) && !defined(__HIPCC__) && !defined(__SYCL_DEVICE_ONLY__) // EIGEN_DEBUG_ASSERTS // see bug 89. The copy_bool here is working around a bug in gcc <= 4.3 #define eigen_assert(a) \ if( (!Eigen::internal::copy_bool(a)) && (!no_more_assert) )\ @@ -329,7 +329,7 @@ namespace Eigen std::cout << "Can't VERIFY_RAISES_STATIC_ASSERT( " #a " ) with exceptions disabled\n"; #endif - #if !defined(__CUDACC__) && !defined(__HIPCC__) + #if !defined(__CUDACC__) && !defined(__HIPCC__) && !defined(__SYCL_DEVICE_ONLY__) #define EIGEN_USE_CUSTOM_ASSERT #endif @@ -845,4 +845,4 @@ int main(int argc, char *argv[]) #ifdef _MSC_VER // 4503 - decorated name length exceeded, name was truncated #pragma warning( disable : 4503) -#endif +#endif \ No newline at end of file -- cgit v1.2.3 From 22031ab59a5b9c7b2612feaa12abe7bcef56a8e2 Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Wed, 8 Aug 2018 11:07:27 +0100 Subject: Adding EIGEN_UNROLL_LOOP macro. --- Eigen/src/Core/util/Macros.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 9a2f5ab05..81ca6b8fe 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -1092,5 +1092,15 @@ bool all(T t, Ts ... ts){ return t && all(ts...); } } #endif +// Wrapping #pragma unroll in a macro since it is required for SYCL +#if defined(__SYCL_DEVICE_ONLY__) + #if defined(_MSC_VER) + #define EIGEN_UNROLL_LOOP __Pragma(unroll) + #else + #define EIGEN_UNROLL_LOOP _Pragma("unroll") + #endif +#else + #define EIGEN_UNROLL_LOOP +#endif #endif // EIGEN_MACROS_H -- cgit v1.2.3 From 67711eaa310b1f1db67fc866cc64f703faa4f788 Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Wed, 8 Aug 2018 11:38:10 +0100 Subject: Fixing typo. --- Eigen/src/Core/util/Macros.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 81ca6b8fe..bcdede61e 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -1095,7 +1095,7 @@ bool all(T t, Ts ... ts){ return t && all(ts...); } // Wrapping #pragma unroll in a macro since it is required for SYCL #if defined(__SYCL_DEVICE_ONLY__) #if defined(_MSC_VER) - #define EIGEN_UNROLL_LOOP __Pragma(unroll) + #define EIGEN_UNROLL_LOOP __pragma(unroll) #else #define EIGEN_UNROLL_LOOP _Pragma("unroll") #endif -- cgit v1.2.3 From 532a0be05c0bc0f9a199f97122c88967494dd4ba Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Wed, 8 Aug 2018 12:12:26 +0100 Subject: Fixing compiler warning in TensorBlock.h as it was creating a lot of noise at compilation. --- unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h index dc8b5ae6d..45ddfdb39 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBlock.h @@ -89,7 +89,7 @@ EIGEN_STRONG_INLINE void MergeResourceRequirements( // policy if block shapes/sizes conflict). *block_shape = resources[0].block_shape; *block_total_size = resources[0].block_total_size; - for (int i = 1; i < resources.size(); ++i) { + for (std::vector::size_type i = 1; i < resources.size(); ++i) { if (resources[i].block_shape == TensorBlockShapeType::kSkewedInnerDims && *block_shape != TensorBlockShapeType::kSkewedInnerDims) { *block_shape = TensorBlockShapeType::kSkewedInnerDims; -- cgit v1.2.3