From 78f37ca03ce9a6e348858d1b7d7b2c2b0e7fec93 Mon Sep 17 00:00:00 2001 From: Igor Babuschkin Date: Fri, 1 Jul 2016 17:34:31 +0100 Subject: Expose real and imag methods on Tensors --- unsupported/Eigen/CXX11/src/Tensor/TensorBase.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 73bfac40e..142778df8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -209,6 +209,18 @@ class TensorBase return unaryExpr(internal::bind2nd_op >(exponent)); } + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + real() const { + return unaryExpr(internal::scalar_real_op()); + } + + EIGEN_DEVICE_FUNC + EIGEN_STRONG_INLINE const TensorCwiseUnaryOp, const Derived> + imag() const { + return unaryExpr(internal::scalar_imag_op()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TensorCwiseUnaryOp >, const Derived> operator+ (Scalar rhs) const { -- cgit v1.2.3 From c5b893f434be42ac2ad60c0144054539bf2c4967 Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Fri, 29 Jul 2016 18:36:08 +0200 Subject: bug #1266: half implementation has been moved to half_impl namespace --- Eigen/src/Core/arch/CUDA/Half.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h index 43a22907b..9df7c2f69 100644 --- a/Eigen/src/Core/arch/CUDA/Half.h +++ b/Eigen/src/Core/arch/CUDA/Half.h @@ -523,7 +523,7 @@ __device__ EIGEN_STRONG_INLINE Eigen::half __shfl_xor(Eigen::half var, int laneM // ldg() has an overload for __half, but we also need one for Eigen::half. #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 350 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) { - return Eigen::internal::raw_uint16_to_half( + return Eigen::half_impl::raw_uint16_to_half( __ldg(reinterpret_cast(ptr))); } #endif -- cgit v1.2.3 From 2693fd54bf9644c94d0c88722659c0e0b2757252 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 29 Jul 2016 13:45:56 -0700 Subject: bug #1266: half implementation has been moved to half_impl namespace --- unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index a48cb1daa..c2a327bf0 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -131,7 +131,7 @@ double loadConstant(const double* address) { } template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Eigen::half loadConstant(const Eigen::half* address) { - return Eigen::half(internal::raw_uint16_to_half(__ldg(&address->x))); + return Eigen::half(half_impl::raw_uint16_to_half(__ldg(&address->x))); } #endif } -- cgit v1.2.3 From 02fe89f5ef060d2348dc67e5a2d3c5b47890e071 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 29 Jul 2016 15:09:34 -0700 Subject: half implementation has been moved to half_impl namespace --- Eigen/src/Core/arch/CUDA/PacketMathHalf.h | 34 +++++++++++++++---------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index 8931f4662..0bf891d24 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -607,7 +607,7 @@ template<> EIGEN_STRONG_INLINE Packet4h pset1(const Eigen::half& from) } template<> EIGEN_STRONG_INLINE Eigen::half pfirst(const Packet4h& from) { - return raw_uint16_to_half(static_cast(_mm_cvtsi64_si32(from.x))); + return half_impl::raw_uint16_to_half(static_cast(_mm_cvtsi64_si32(from.x))); } template<> EIGEN_STRONG_INLINE Packet4h pconj(const Packet4h& a) { return a; } @@ -618,17 +618,17 @@ template<> EIGEN_STRONG_INLINE Packet4h padd(const Packet4h& a, const Eigen::half h[4]; - Eigen::half ha = raw_uint16_to_half(static_cast(a64)); - Eigen::half hb = raw_uint16_to_half(static_cast(b64)); + Eigen::half ha = half_impl::raw_uint16_to_half(static_cast(a64)); + Eigen::half hb = half_impl::raw_uint16_to_half(static_cast(b64)); h[0] = ha + hb; - ha = raw_uint16_to_half(static_cast(a64 >> 16)); - hb = raw_uint16_to_half(static_cast(b64 >> 16)); + ha = half_impl::raw_uint16_to_half(static_cast(a64 >> 16)); + hb = half_impl::raw_uint16_to_half(static_cast(b64 >> 16)); h[1] = ha + hb; - ha = raw_uint16_to_half(static_cast(a64 >> 32)); - hb = raw_uint16_to_half(static_cast(b64 >> 32)); + ha = half_impl::raw_uint16_to_half(static_cast(a64 >> 32)); + hb = half_impl::raw_uint16_to_half(static_cast(b64 >> 32)); h[2] = ha + hb; - ha = raw_uint16_to_half(static_cast(a64 >> 48)); - hb = raw_uint16_to_half(static_cast(b64 >> 48)); + ha = half_impl::raw_uint16_to_half(static_cast(a64 >> 48)); + hb = half_impl::raw_uint16_to_half(static_cast(b64 >> 48)); h[3] = ha + hb; Packet4h result; result.x = _mm_set_pi16(h[3].x, h[2].x, h[1].x, h[0].x); @@ -641,17 +641,17 @@ template<> EIGEN_STRONG_INLINE Packet4h pmul(const Packet4h& a, const Eigen::half h[4]; - Eigen::half ha = raw_uint16_to_half(static_cast(a64)); - Eigen::half hb = raw_uint16_to_half(static_cast(b64)); + Eigen::half ha = half_impl::raw_uint16_to_half(static_cast(a64)); + Eigen::half hb = half_impl::raw_uint16_to_half(static_cast(b64)); h[0] = ha * hb; - ha = raw_uint16_to_half(static_cast(a64 >> 16)); - hb = raw_uint16_to_half(static_cast(b64 >> 16)); + ha = half_impl::raw_uint16_to_half(static_cast(a64 >> 16)); + hb = half_impl::raw_uint16_to_half(static_cast(b64 >> 16)); h[1] = ha * hb; - ha = raw_uint16_to_half(static_cast(a64 >> 32)); - hb = raw_uint16_to_half(static_cast(b64 >> 32)); + ha = half_impl::raw_uint16_to_half(static_cast(a64 >> 32)); + hb = half_impl::raw_uint16_to_half(static_cast(b64 >> 32)); h[2] = ha * hb; - ha = raw_uint16_to_half(static_cast(a64 >> 48)); - hb = raw_uint16_to_half(static_cast(b64 >> 48)); + ha = half_impl::raw_uint16_to_half(static_cast(a64 >> 48)); + hb = half_impl::raw_uint16_to_half(static_cast(b64 >> 48)); h[3] = ha * hb; Packet4h result; result.x = _mm_set_pi16(h[3].x, h[2].x, h[1].x, h[0].x); -- cgit v1.2.3 From 7995cec90c63380ba05b297d1e196affc371cac9 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Sun, 31 Jul 2016 15:20:22 +0200 Subject: Fix vectorization logic for coeff-based product for some corner cases. --- Eigen/src/Core/ProductEvaluators.h | 4 ++-- test/product_small.cpp | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/Eigen/src/Core/ProductEvaluators.h b/Eigen/src/Core/ProductEvaluators.h index d739c5156..955668bef 100644 --- a/Eigen/src/Core/ProductEvaluators.h +++ b/Eigen/src/Core/ProductEvaluators.h @@ -489,8 +489,8 @@ struct product_evaluator, ProductTag, DenseShape, SameType = is_same::value, - CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit), - CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit), + CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime!=1), + CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime!=1), EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0 diff --git a/test/product_small.cpp b/test/product_small.cpp index c35db6f65..3e8dab01e 100644 --- a/test/product_small.cpp +++ b/test/product_small.cpp @@ -177,6 +177,38 @@ void test_lazy_l3() CALL_SUBTEST(( test_lazy_all_layout(4,cols,depth) )); } +template +void test_linear_but_not_vectorizable() +{ + // Check tricky cases for which the result of the product is a vector and thus must exhibit the LinearBit flag, + // but is not vectorizable along the linear dimension. + Index n = N==Dynamic ? internal::random(1,32) : N; + Index m = M==Dynamic ? internal::random(1,32) : M; + Index k = K==Dynamic ? internal::random(1,32) : K; + + { + Matrix A; A.setRandom(n,m+1); + Matrix B; B.setRandom(m*2,k); + Matrix C; + Matrix R; + + C.noalias() = A.template topLeftCorner<1,M>() * (B.template topRows()+B.template bottomRows()); + R.noalias() = A.template topLeftCorner<1,M>() * (B.template topRows()+B.template bottomRows()).eval(); + VERIFY_IS_APPROX(C,R); + } + + { + Matrix A; A.setRandom(m+1,n); + Matrix B; B.setRandom(k,m*2); + Matrix C; + Matrix R; + + C.noalias() = (B.template leftCols()+B.template rightCols()) * A.template topLeftCorner(); + R.noalias() = (B.template leftCols()+B.template rightCols()).eval() * A.template topLeftCorner(); + VERIFY_IS_APPROX(C,R); + } +} + void test_product_small() { for(int i = 0; i < g_repeat; i++) { @@ -202,6 +234,10 @@ void test_product_small() CALL_SUBTEST_41( test_lazy_l1 >() ); CALL_SUBTEST_42( test_lazy_l2 >() ); CALL_SUBTEST_43( test_lazy_l3 >() ); + + CALL_SUBTEST_7(( test_linear_but_not_vectorizable() )); + CALL_SUBTEST_7(( test_linear_but_not_vectorizable() )); + CALL_SUBTEST_7(( test_linear_but_not_vectorizable() )); } #ifdef EIGEN_TEST_PART_6 -- cgit v1.2.3 From e256acec7c2ee0aeab17f067290bfbb944adbe1c Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 1 Aug 2016 17:03:39 -0700 Subject: Avoid unecessary object copies --- unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h | 12 ++++-------- unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h | 4 ++-- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h index 34270730b..069680a11 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceThreadPool.h @@ -151,9 +151,7 @@ struct ThreadPoolDevice { template EIGEN_STRONG_INLINE Notification* enqueue(Function&& f, Args&&... args) const { Notification* n = new Notification(); - std::function func = - std::bind(&FunctionWrapperWithNotification::run, n, f, args...); - pool_->Schedule(func); + pool_->Schedule(std::bind(&FunctionWrapperWithNotification::run, n, f, args...)); return n; } @@ -161,15 +159,13 @@ struct ThreadPoolDevice { EIGEN_STRONG_INLINE void enqueue_with_barrier(Barrier* b, Function&& f, Args&&... args) const { - std::function func = std::bind( - &FunctionWrapperWithBarrier::run, b, f, args...); - pool_->Schedule(func); + pool_->Schedule(std::bind( + &FunctionWrapperWithBarrier::run, b, f, args...)); } template EIGEN_STRONG_INLINE void enqueueNoNotification(Function&& f, Args&&... args) const { - std::function func = std::bind(f, args...); - pool_->Schedule(func); + pool_->Schedule(std::bind(f, args...)); } // Returns a logical thread index between 0 and pool_->NumThreads() - 1 if diff --git a/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h b/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h index d2204ad5b..399f95cc1 100644 --- a/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h +++ b/unsupported/Eigen/CXX11/src/ThreadPool/ThreadEnvironment.h @@ -21,14 +21,14 @@ struct StlThreadEnvironment { // destructor must join the thread. class EnvThread { public: - EnvThread(std::function f) : thr_(f) {} + EnvThread(std::function f) : thr_(std::move(f)) {} ~EnvThread() { thr_.join(); } private: std::thread thr_; }; - EnvThread* CreateThread(std::function f) { return new EnvThread(f); } + EnvThread* CreateThread(std::function f) { return new EnvThread(std::move(f)); } Task CreateTask(std::function f) { return Task{std::move(f)}; } void ExecuteTask(const Task& t) { t.f(); } }; -- cgit v1.2.3 From fd220dd8b009dc37b8c914cde25862c1e06e419a Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 1 Aug 2016 18:16:16 -0700 Subject: Use numext::conj instead of std::conj --- unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h index ece2ed91b..08eb5595a 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFFT.h @@ -329,7 +329,7 @@ struct TensorEvaluator, D for (Index i = 0; i < n; ++i) { if(FFTDir == FFT_FORWARD) { - a[i] = data[i] * std::conj(pos_j_base_powered[i]); + a[i] = data[i] * numext::conj(pos_j_base_powered[i]); } else { a[i] = data[i] * pos_j_base_powered[i]; @@ -344,7 +344,7 @@ struct TensorEvaluator, D b[i] = pos_j_base_powered[i]; } else { - b[i] = std::conj(pos_j_base_powered[i]); + b[i] = numext::conj(pos_j_base_powered[i]); } } for (Index i = n; i < m - n; ++i) { @@ -355,7 +355,7 @@ struct TensorEvaluator, D b[i] = pos_j_base_powered[m-i]; } else { - b[i] = std::conj(pos_j_base_powered[m-i]); + b[i] = numext::conj(pos_j_base_powered[m-i]); } } @@ -379,7 +379,7 @@ struct TensorEvaluator, D for (Index i = 0; i < n; ++i) { if(FFTDir == FFT_FORWARD) { - data[i] = a[i] * std::conj(pos_j_base_powered[i]); + data[i] = a[i] * numext::conj(pos_j_base_powered[i]); } else { data[i] = a[i] * pos_j_base_powered[i]; -- cgit v1.2.3 From f4404777ff4b8f8ed70a479276c657cb6062465d Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Tue, 2 Aug 2016 17:08:57 +0000 Subject: Change project name to Eigen3, to be compatible with FindEigen3.cmake and Eigen3Config.cmake. This is related to pull-requests 214. --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index abae3b23c..b4d28125a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -project(Eigen) +project(Eigen3) cmake_minimum_required(VERSION 2.8.5) -- cgit v1.2.3 From 819d0cea1b8cc27c55c94886c85a0a3efc155a50 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Tue, 2 Aug 2016 23:32:41 +0200 Subject: List PARDISO solver. --- doc/SparseLinearSystems.dox | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/SparseLinearSystems.dox b/doc/SparseLinearSystems.dox index ee4f53a4e..fc33b93e7 100644 --- a/doc/SparseLinearSystems.dox +++ b/doc/SparseLinearSystems.dox @@ -76,6 +76,9 @@ They are summarized in the following tables: SPQR\link SPQRSupport_Module SPQRSupport \endlink QR factorization Any, rectangularfill-in reducing, multithreaded, fast dense algebra requires the SuiteSparse package, \b GPL recommended for linear least-squares problems, has a rank-revealing feature +PardisoLLT \n PardisoLDLT \n PardisoLU\link PardisoSupport_Module PardisoSupport \endlinkDirect LLt, LDLt, LU factorizationsSPD \n SPD \n SquareFill-in reducing, Leverage fast dense algebra, Multithreading + Requires the Intel MKL package, \b Proprietary + optimized for tough problems patterns, see also \link TopicUsingIntelMKL using MKL with Eigen \endlink Here \c SPD means symmetric positive definite. -- cgit v1.2.3 From a20b58845f5f457375a91ec7e8acdeee2f920d33 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 3 Aug 2016 10:00:43 -0700 Subject: CUDA_ARCH isn't always defined, so avoid relying on it too much when figuring out which implementation to use for reductions. Instead rely on the device to tell us on which hardware version we're running. --- unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h index d3894e625..5e512490c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionCuda.h @@ -336,11 +336,9 @@ struct FullReducer { static const bool HasOptimizedImplementation = !Op::IsStateful && (internal::is_same::value || (internal::is_same::value && reducer_traits::PacketAccess)); -#elif __CUDA_ARCH__ >= 300 +#else static const bool HasOptimizedImplementation = !Op::IsStateful && internal::is_same::value; -#else - static const bool HasOptimizedImplementation = false; #endif template @@ -619,11 +617,9 @@ struct InnerReducer { static const bool HasOptimizedImplementation = !Op::IsStateful && (internal::is_same::value || (internal::is_same::value && reducer_traits::PacketAccess)); -#elif __CUDA_ARCH__ >= 300 +#else static const bool HasOptimizedImplementation = !Op::IsStateful && internal::is_same::value; -#else - static const bool HasOptimizedImplementation = false; #endif template @@ -678,12 +674,8 @@ struct OuterReducer { // Unfortunately nvidia doesn't support well exotic types such as complex, // so reduce the scope of the optimized version of the code to the simple case // of floats. -#if __CUDA_ARCH__ >= 300 static const bool HasOptimizedImplementation = !Op::IsStateful && internal::is_same::value; -#else - static const bool HasOptimizedImplementation = false; -#endif template static EIGEN_DEVICE_FUNC bool run(const Self&, Op&, const Device&, OutputType*, typename Self::Index, typename Self::Index) { -- cgit v1.2.3 From 81099ef48272ccb640baf1a907c37e40a121e590 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 3 Aug 2016 11:41:17 -0700 Subject: Added a test for fp16 --- unsupported/test/CMakeLists.txt | 3 + unsupported/test/cxx11_float16.cpp | 184 +++++++++++++++++++++++++++++++++++++ 2 files changed, 187 insertions(+) create mode 100644 unsupported/test/cxx11_float16.cpp diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 6188b421a..b29b694e7 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -142,6 +142,9 @@ if(EIGEN_TEST_CXX11) # older compiler that don't support cxx11. set(CMAKE_CXX_STANDARD 11) + # Todo: remove the c++11 bits from this test to make it work with older compilers. + ei_add_test(cxx11_float16) + ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}") ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}") ei_add_test(cxx11_non_blocking_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}") diff --git a/unsupported/test/cxx11_float16.cpp b/unsupported/test/cxx11_float16.cpp new file mode 100644 index 000000000..69dfd7e80 --- /dev/null +++ b/unsupported/test/cxx11_float16.cpp @@ -0,0 +1,184 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_float16 +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int +#define EIGEN_USE_GPU + + +#include "main.h" +#include + +using Eigen::half; + +void test_conversion() +{ + // Conversion from float. + VERIFY_IS_EQUAL(Eigen::half(1.0f).x, 0x3c00); + VERIFY_IS_EQUAL(Eigen::half(0.5f).x, 0x3800); + VERIFY_IS_EQUAL(Eigen::half(0.33333f).x, 0x3555); + VERIFY_IS_EQUAL(Eigen::half(0.0f).x, 0x0000); + VERIFY_IS_EQUAL(Eigen::half(-0.0f).x, 0x8000); + VERIFY_IS_EQUAL(Eigen::half(65504.0f).x, 0x7bff); + VERIFY_IS_EQUAL(Eigen::half(65536.0f).x, 0x7c00); // Becomes infinity. + + // Denormals. + VERIFY_IS_EQUAL(Eigen::half(-5.96046e-08f).x, 0x8001); + VERIFY_IS_EQUAL(Eigen::half(5.96046e-08f).x, 0x0001); + VERIFY_IS_EQUAL(Eigen::half(1.19209e-07f).x, 0x0002); + + // Verify round-to-nearest-even behavior. + float val1 = float(Eigen::half(half_impl::__half{0x3c00})); + float val2 = float(Eigen::half(half_impl::__half{0x3c01})); + float val3 = float(Eigen::half(half_impl::__half{0x3c02})); + VERIFY_IS_EQUAL(Eigen::half(0.5 * (val1 + val2)).x, 0x3c00); + VERIFY_IS_EQUAL(Eigen::half(0.5 * (val2 + val3)).x, 0x3c02); + + // Conversion from int. + VERIFY_IS_EQUAL(Eigen::half(-1).x, 0xbc00); + VERIFY_IS_EQUAL(Eigen::half(0).x, 0x0000); + VERIFY_IS_EQUAL(Eigen::half(1).x, 0x3c00); + VERIFY_IS_EQUAL(Eigen::half(2).x, 0x4000); + VERIFY_IS_EQUAL(Eigen::half(3).x, 0x4200); + + // Conversion from bool. + VERIFY_IS_EQUAL(Eigen::half(false).x, 0x0000); + VERIFY_IS_EQUAL(Eigen::half(true).x, 0x3c00); + + // Conversion to float. + VERIFY_IS_EQUAL(float(Eigen::half(half_impl::__half{0x0000})), 0.0f); + VERIFY_IS_EQUAL(float(Eigen::half(half_impl::__half{0x3c00})), 1.0f); + + // Denormals. + VERIFY_IS_APPROX(float(Eigen::half(half_impl::__half{0x8001})), -5.96046e-08f); + VERIFY_IS_APPROX(float(Eigen::half(half_impl::__half{0x0001})), 5.96046e-08f); + VERIFY_IS_APPROX(float(Eigen::half(half_impl::__half{0x0002})), 1.19209e-07f); + + // NaNs and infinities. + VERIFY(!isinf(float(Eigen::half(65504.0f)))); // Largest finite number. + VERIFY(!isnan(float(Eigen::half(0.0f)))); + VERIFY(isinf(float(Eigen::half(half_impl::__half{0xfc00})))); + VERIFY(isnan(float(Eigen::half(half_impl::__half{0xfc01})))); + VERIFY(isinf(float(Eigen::half(half_impl::__half{0x7c00})))); + VERIFY(isnan(float(Eigen::half(half_impl::__half{0x7c01})))); + VERIFY(isnan(float(Eigen::half(0.0 / 0.0)))); + VERIFY(isinf(float(Eigen::half(1.0 / 0.0)))); + VERIFY(isinf(float(Eigen::half(-1.0 / 0.0)))); + + // Exactly same checks as above, just directly on the half representation. + VERIFY(!numext::isinf(Eigen::half(half_impl::__half{0x7bff}))); + VERIFY(!numext::isnan(Eigen::half(half_impl::__half{0x0000}))); + VERIFY(numext::isinf(Eigen::half(half_impl::__half{0xfc00}))); + VERIFY(numext::isnan(Eigen::half(half_impl::__half{0xfc01}))); + VERIFY(numext::isinf(Eigen::half(half_impl::__half{0x7c00}))); + VERIFY(numext::isnan(Eigen::half(half_impl::__half{0x7c01}))); + VERIFY(numext::isnan(Eigen::half(0.0 / 0.0))); + VERIFY(numext::isinf(Eigen::half(1.0 / 0.0))); + VERIFY(numext::isinf(Eigen::half(-1.0 / 0.0))); +} + +void test_arithmetic() +{ + VERIFY_IS_EQUAL(float(Eigen::half(2) + Eigen::half(2)), 4); + VERIFY_IS_EQUAL(float(Eigen::half(2) + Eigen::half(-2)), 0); + VERIFY_IS_APPROX(float(Eigen::half(0.33333f) + Eigen::half(0.66667f)), 1.0f); + VERIFY_IS_EQUAL(float(Eigen::half(2.0f) * Eigen::half(-5.5f)), -11.0f); + VERIFY_IS_APPROX(float(Eigen::half(1.0f) / Eigen::half(3.0f)), 0.33333f); + VERIFY_IS_EQUAL(float(-Eigen::half(4096.0f)), -4096.0f); + VERIFY_IS_EQUAL(float(-Eigen::half(-4096.0f)), 4096.0f); +} + +void test_comparison() +{ + VERIFY(Eigen::half(1.0f) > Eigen::half(0.5f)); + VERIFY(Eigen::half(0.5f) < Eigen::half(1.0f)); + VERIFY(!(Eigen::half(1.0f) < Eigen::half(0.5f))); + VERIFY(!(Eigen::half(0.5f) > Eigen::half(1.0f))); + + VERIFY(!(Eigen::half(4.0f) > Eigen::half(4.0f))); + VERIFY(!(Eigen::half(4.0f) < Eigen::half(4.0f))); + + VERIFY(!(Eigen::half(0.0f) < Eigen::half(-0.0f))); + VERIFY(!(Eigen::half(-0.0f) < Eigen::half(0.0f))); + VERIFY(!(Eigen::half(0.0f) > Eigen::half(-0.0f))); + VERIFY(!(Eigen::half(-0.0f) > Eigen::half(0.0f))); + + VERIFY(Eigen::half(0.2f) > Eigen::half(-1.0f)); + VERIFY(Eigen::half(-1.0f) < Eigen::half(0.2f)); + VERIFY(Eigen::half(-16.0f) < Eigen::half(-15.0f)); + + VERIFY(Eigen::half(1.0f) == Eigen::half(1.0f)); + VERIFY(Eigen::half(1.0f) != Eigen::half(2.0f)); + + // Comparisons with NaNs and infinities. + VERIFY(!(Eigen::half(0.0 / 0.0) == Eigen::half(0.0 / 0.0))); + VERIFY(!(Eigen::half(0.0 / 0.0) != Eigen::half(0.0 / 0.0))); + + VERIFY(!(Eigen::half(1.0) == Eigen::half(0.0 / 0.0))); + VERIFY(!(Eigen::half(1.0) < Eigen::half(0.0 / 0.0))); + VERIFY(!(Eigen::half(1.0) > Eigen::half(0.0 / 0.0))); + VERIFY(!(Eigen::half(1.0) != Eigen::half(0.0 / 0.0))); + + VERIFY(Eigen::half(1.0) < Eigen::half(1.0 / 0.0)); + VERIFY(Eigen::half(1.0) > Eigen::half(-1.0 / 0.0)); +} + + +void test_basic_functions() +{ + VERIFY_IS_EQUAL(float(numext::abs(Eigen::half(3.5f))), 3.5f); + VERIFY_IS_EQUAL(float(numext::abs(Eigen::half(-3.5f))), 3.5f); + + VERIFY_IS_EQUAL(float(numext::floor(Eigen::half(3.5f))), 3.0f); + VERIFY_IS_EQUAL(float(numext::floor(Eigen::half(-3.5f))), -4.0f); + + VERIFY_IS_EQUAL(float(numext::ceil(Eigen::half(3.5f))), 4.0f); + VERIFY_IS_EQUAL(float(numext::ceil(Eigen::half(-3.5f))), -3.0f); + + VERIFY_IS_APPROX(float(numext::sqrt(Eigen::half(0.0f))), 0.0f); + VERIFY_IS_APPROX(float(numext::sqrt(Eigen::half(4.0f))), 2.0f); + + VERIFY_IS_APPROX(float(numext::pow(Eigen::half(0.0f), Eigen::half(1.0f))), 0.0f); + VERIFY_IS_APPROX(float(numext::pow(Eigen::half(2.0f), Eigen::half(2.0f))), 4.0f); + + VERIFY_IS_EQUAL(float(numext::exp(Eigen::half(0.0f))), 1.0f); + VERIFY_IS_APPROX(float(numext::exp(Eigen::half(EIGEN_PI))), float(20.0 + EIGEN_PI)); + + VERIFY_IS_EQUAL(float(numext::log(Eigen::half(1.0f))), 0.0f); + VERIFY_IS_APPROX(float(numext::log(Eigen::half(10.0f))), 2.30273f); +} + +void test_trigonometric_functions() +{ + VERIFY_IS_APPROX(numext::cos(Eigen::half(0.0f)), Eigen::half(cosf(0.0f))); + VERIFY_IS_APPROX(numext::cos(Eigen::half(EIGEN_PI)), Eigen::half(cosf(EIGEN_PI))); + VERIFY_IS_APPROX_OR_LESS_THAN(numext::cos(Eigen::half(EIGEN_PI/2)), NumTraits::epsilon() * Eigen::half(5)); + VERIFY_IS_APPROX_OR_LESS_THAN(numext::cos(Eigen::half(3*EIGEN_PI/2)), NumTraits::epsilon() * Eigen::half(5)); + VERIFY_IS_APPROX(numext::cos(Eigen::half(3.5f)), Eigen::half(cosf(3.5f))); + + VERIFY_IS_APPROX(numext::sin(Eigen::half(0.0f)), Eigen::half(sinf(0.0f))); + VERIFY_IS_APPROX_OR_LESS_THAN(numext::sin(Eigen::half(EIGEN_PI)), NumTraits::epsilon() * Eigen::half(10)); + + VERIFY_IS_APPROX(numext::sin(Eigen::half(EIGEN_PI/2)), Eigen::half(sinf(EIGEN_PI/2))); + VERIFY_IS_APPROX(numext::sin(Eigen::half(3*EIGEN_PI/2)), Eigen::half(sinf(3*EIGEN_PI/2))); + VERIFY_IS_APPROX(numext::sin(Eigen::half(3.5f)), Eigen::half(sinf(3.5f))); + + VERIFY_IS_APPROX(numext::tan(Eigen::half(0.0f)), Eigen::half(tanf(0.0f))); + VERIFY_IS_APPROX_OR_LESS_THAN(numext::tan(Eigen::half(EIGEN_PI)), NumTraits::epsilon() * Eigen::half(10)); + VERIFY_IS_APPROX(numext::tan(Eigen::half(3.5f)), Eigen::half(tanf(3.5f))); +} + +void test_cxx11_float16() +{ + CALL_SUBTEST(test_conversion()); + CALL_SUBTEST(test_arithmetic()); + CALL_SUBTEST(test_comparison()); + CALL_SUBTEST(test_basic_functions()); + CALL_SUBTEST(test_trigonometric_functions()); +} -- cgit v1.2.3 From d92df04ce893b75c9d2c8ebc463c17a05425edb7 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 3 Aug 2016 11:50:07 -0700 Subject: Cleaned up the new float16 test a bit --- unsupported/test/cxx11_float16.cpp | 46 ++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/unsupported/test/cxx11_float16.cpp b/unsupported/test/cxx11_float16.cpp index 69dfd7e80..f646c9750 100644 --- a/unsupported/test/cxx11_float16.cpp +++ b/unsupported/test/cxx11_float16.cpp @@ -11,12 +11,14 @@ #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_GPU +// Make sure it's possible to forward declare Eigen::half +//namespace Eigen { +//struct half; +//} #include "main.h" #include -using Eigen::half; - void test_conversion() { // Conversion from float. @@ -37,8 +39,8 @@ void test_conversion() float val1 = float(Eigen::half(half_impl::__half{0x3c00})); float val2 = float(Eigen::half(half_impl::__half{0x3c01})); float val3 = float(Eigen::half(half_impl::__half{0x3c02})); - VERIFY_IS_EQUAL(Eigen::half(0.5 * (val1 + val2)).x, 0x3c00); - VERIFY_IS_EQUAL(Eigen::half(0.5 * (val2 + val3)).x, 0x3c02); + VERIFY_IS_EQUAL(Eigen::half(0.5f * (val1 + val2)).x, 0x3c00); + VERIFY_IS_EQUAL(Eigen::half(0.5f * (val2 + val3)).x, 0x3c02); // Conversion from int. VERIFY_IS_EQUAL(Eigen::half(-1).x, 0xbc00); @@ -61,26 +63,26 @@ void test_conversion() VERIFY_IS_APPROX(float(Eigen::half(half_impl::__half{0x0002})), 1.19209e-07f); // NaNs and infinities. - VERIFY(!isinf(float(Eigen::half(65504.0f)))); // Largest finite number. - VERIFY(!isnan(float(Eigen::half(0.0f)))); - VERIFY(isinf(float(Eigen::half(half_impl::__half{0xfc00})))); - VERIFY(isnan(float(Eigen::half(half_impl::__half{0xfc01})))); - VERIFY(isinf(float(Eigen::half(half_impl::__half{0x7c00})))); - VERIFY(isnan(float(Eigen::half(half_impl::__half{0x7c01})))); - VERIFY(isnan(float(Eigen::half(0.0 / 0.0)))); - VERIFY(isinf(float(Eigen::half(1.0 / 0.0)))); - VERIFY(isinf(float(Eigen::half(-1.0 / 0.0)))); + VERIFY(!(isinf)(float(Eigen::half(65504.0f)))); // Largest finite number. + VERIFY(!(isnan)(float(Eigen::half(0.0f)))); + VERIFY((isinf)(float(Eigen::half(half_impl::__half{0xfc00})))); + VERIFY((isnan)(float(Eigen::half(half_impl::__half{0xfc01})))); + VERIFY((isinf)(float(Eigen::half(half_impl::__half{0x7c00})))); + VERIFY((isnan)(float(Eigen::half(half_impl::__half{0x7c01})))); + VERIFY((isnan)(float(Eigen::half(0.0 / 0.0)))); + VERIFY((isinf)(float(Eigen::half(1.0 / 0.0)))); + VERIFY((isinf)(float(Eigen::half(-1.0 / 0.0)))); // Exactly same checks as above, just directly on the half representation. - VERIFY(!numext::isinf(Eigen::half(half_impl::__half{0x7bff}))); - VERIFY(!numext::isnan(Eigen::half(half_impl::__half{0x0000}))); - VERIFY(numext::isinf(Eigen::half(half_impl::__half{0xfc00}))); - VERIFY(numext::isnan(Eigen::half(half_impl::__half{0xfc01}))); - VERIFY(numext::isinf(Eigen::half(half_impl::__half{0x7c00}))); - VERIFY(numext::isnan(Eigen::half(half_impl::__half{0x7c01}))); - VERIFY(numext::isnan(Eigen::half(0.0 / 0.0))); - VERIFY(numext::isinf(Eigen::half(1.0 / 0.0))); - VERIFY(numext::isinf(Eigen::half(-1.0 / 0.0))); + VERIFY(!(numext::isinf)(Eigen::half(half_impl::__half{0x7bff}))); + VERIFY(!(numext::isnan)(Eigen::half(half_impl::__half{0x0000}))); + VERIFY((numext::isinf)(Eigen::half(half_impl::__half{0xfc00}))); + VERIFY((numext::isnan)(Eigen::half(half_impl::__half{0xfc01}))); + VERIFY((numext::isinf)(Eigen::half(half_impl::__half{0x7c00}))); + VERIFY((numext::isnan)(Eigen::half(half_impl::__half{0x7c01}))); + VERIFY((numext::isnan)(Eigen::half(0.0 / 0.0))); + VERIFY((numext::isinf)(Eigen::half(1.0 / 0.0))); + VERIFY((numext::isinf)(Eigen::half(-1.0 / 0.0))); } void test_arithmetic() -- cgit v1.2.3 From 17b9a55d98cf9fba419d1b43c8723bcbeb7e58f0 Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Thu, 4 Aug 2016 00:00:43 +0200 Subject: Move Eigen::half_impl::half to Eigen::half while preserving the free functions to the Eigen::half_impl namespace together with ADL --- Eigen/src/Core/arch/CUDA/Half.h | 72 +++++++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 28 deletions(-) diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h index 9df7c2f69..6ae2c53c5 100644 --- a/Eigen/src/Core/arch/CUDA/Half.h +++ b/Eigen/src/Core/arch/CUDA/Half.h @@ -45,6 +45,8 @@ namespace Eigen { +struct half; + namespace half_impl { #if !defined(EIGEN_HAS_CUDA_FP16) @@ -62,60 +64,72 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half raw_uint16_to_half(unsigned short x EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff); EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h); +struct half_base : public __half { + explicit EIGEN_DEVICE_FUNC half_base(unsigned short raw) : __half(raw) {} + EIGEN_DEVICE_FUNC half_base() {} + EIGEN_DEVICE_FUNC half_base(const __half& h) : __half(h) {} +}; + +} // namespace half_impl + // Class definition. -struct half : public __half { +struct half : public half_impl::half_base { + #if !defined(EIGEN_HAS_CUDA_FP16) + typedef half_impl::__half __half; + #endif + EIGEN_DEVICE_FUNC half() {} - EIGEN_DEVICE_FUNC half(const __half& h) : __half(h) {} - EIGEN_DEVICE_FUNC half(const half& h) : __half(h) {} + EIGEN_DEVICE_FUNC half(const __half& h) : half_impl::half_base(h) {} + EIGEN_DEVICE_FUNC half(const half& h) : half_impl::half_base(h) {} explicit EIGEN_DEVICE_FUNC half(bool b) - : __half(raw_uint16_to_half(b ? 0x3c00 : 0)) {} + : half_impl::half_base(half_impl::raw_uint16_to_half(b ? 0x3c00 : 0)) {} template explicit EIGEN_DEVICE_FUNC half(const T& val) - : __half(float_to_half_rtne(static_cast(val))) {} + : half_impl::half_base(half_impl::float_to_half_rtne(static_cast(val))) {} explicit EIGEN_DEVICE_FUNC half(float f) - : __half(float_to_half_rtne(f)) {} + : half_impl::half_base(half_impl::float_to_half_rtne(f)) {} EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(bool) const { // +0.0 and -0.0 become false, everything else becomes true. return (x & 0x7fff) != 0; } EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(signed char) const { - return static_cast(half_to_float(*this)); + return static_cast(half_impl::half_to_float(*this)); } EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned char) const { - return static_cast(half_to_float(*this)); + return static_cast(half_impl::half_to_float(*this)); } EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(short) const { - return static_cast(half_to_float(*this)); + return static_cast(half_impl::half_to_float(*this)); } EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned short) const { - return static_cast(half_to_float(*this)); + return static_cast(half_impl::half_to_float(*this)); } EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(int) const { - return static_cast(half_to_float(*this)); + return static_cast(half_impl::half_to_float(*this)); } EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned int) const { - return static_cast(half_to_float(*this)); + return static_cast(half_impl::half_to_float(*this)); } EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long) const { - return static_cast(half_to_float(*this)); + return static_cast(half_impl::half_to_float(*this)); } EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long) const { - return static_cast(half_to_float(*this)); + return static_cast(half_impl::half_to_float(*this)); } EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(long long) const { - return static_cast(half_to_float(*this)); + return static_cast(half_impl::half_to_float(*this)); } EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(unsigned long long) const { return static_cast(half_to_float(*this)); } EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(float) const { - return half_to_float(*this); + return half_impl::half_to_float(*this); } EIGEN_DEVICE_FUNC EIGEN_EXPLICIT_CAST(double) const { - return static_cast(half_to_float(*this)); + return static_cast(half_impl::half_to_float(*this)); } EIGEN_DEVICE_FUNC half& operator=(const half& other) { @@ -124,6 +138,8 @@ struct half : public __half { } }; +namespace half_impl { + #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 // Intrinsics for native fp16 support. Note that on current hardware, @@ -430,12 +446,12 @@ EIGEN_ALWAYS_INLINE std::ostream& operator << (std::ostream& os, const half& v) } // end namespace half_impl // import Eigen::half_impl::half into Eigen namespace -using half_impl::half; +// using half_impl::half; namespace internal { template<> -struct random_default_impl +struct random_default_impl { static inline half run(const half& x, const half& y) { @@ -447,27 +463,27 @@ struct random_default_impl } }; -template<> struct is_arithmetic { enum { value = true }; }; +template<> struct is_arithmetic { enum { value = true }; }; } // end namespace internal -template<> struct NumTraits - : GenericNumTraits +template<> struct NumTraits + : GenericNumTraits { - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half_impl::half epsilon() { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half epsilon() { return half_impl::raw_uint16_to_half(0x0800); } - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half_impl::half dummy_precision() { return half_impl::half(1e-2f); } - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half_impl::half highest() { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half dummy_precision() { return Eigen::half(1e-2f); } + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half highest() { return half_impl::raw_uint16_to_half(0x7bff); } - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half_impl::half lowest() { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half lowest() { return half_impl::raw_uint16_to_half(0xfbff); } - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half_impl::half infinity() { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half infinity() { return half_impl::raw_uint16_to_half(0x7c00); } - EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half_impl::half quiet_NaN() { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Eigen::half quiet_NaN() { return half_impl::raw_uint16_to_half(0x7c01); } }; -- cgit v1.2.3 From 373bb12dc69bfd5d8a5a767c148b2899efd8f9df Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 3 Aug 2016 16:07:31 -0700 Subject: Check that it's possible to forward declare the hlaf type. --- test/half_float.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/half_float.cpp b/test/half_float.cpp index 289e4f662..6f299a346 100644 --- a/test/half_float.cpp +++ b/test/half_float.cpp @@ -11,6 +11,11 @@ #include +// Make sure it's possible to forward declare Eigen::half +namespace Eigen { +struct half; +} + using Eigen::half; void test_conversion() -- cgit v1.2.3 From fad9828769450db81155a1e5a18108fb93e30f88 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 3 Aug 2016 16:08:37 -0700 Subject: Deleted redundant regression test. --- unsupported/test/CMakeLists.txt | 3 - unsupported/test/cxx11_float16.cpp | 186 ------------------------------------- 2 files changed, 189 deletions(-) delete mode 100644 unsupported/test/cxx11_float16.cpp diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index b29b694e7..6188b421a 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -142,9 +142,6 @@ if(EIGEN_TEST_CXX11) # older compiler that don't support cxx11. set(CMAKE_CXX_STANDARD 11) - # Todo: remove the c++11 bits from this test to make it work with older compilers. - ei_add_test(cxx11_float16) - ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}") ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}") ei_add_test(cxx11_non_blocking_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}") diff --git a/unsupported/test/cxx11_float16.cpp b/unsupported/test/cxx11_float16.cpp deleted file mode 100644 index f646c9750..000000000 --- a/unsupported/test/cxx11_float16.cpp +++ /dev/null @@ -1,186 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#define EIGEN_TEST_NO_LONGDOUBLE -#define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_float16 -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int -#define EIGEN_USE_GPU - -// Make sure it's possible to forward declare Eigen::half -//namespace Eigen { -//struct half; -//} - -#include "main.h" -#include - -void test_conversion() -{ - // Conversion from float. - VERIFY_IS_EQUAL(Eigen::half(1.0f).x, 0x3c00); - VERIFY_IS_EQUAL(Eigen::half(0.5f).x, 0x3800); - VERIFY_IS_EQUAL(Eigen::half(0.33333f).x, 0x3555); - VERIFY_IS_EQUAL(Eigen::half(0.0f).x, 0x0000); - VERIFY_IS_EQUAL(Eigen::half(-0.0f).x, 0x8000); - VERIFY_IS_EQUAL(Eigen::half(65504.0f).x, 0x7bff); - VERIFY_IS_EQUAL(Eigen::half(65536.0f).x, 0x7c00); // Becomes infinity. - - // Denormals. - VERIFY_IS_EQUAL(Eigen::half(-5.96046e-08f).x, 0x8001); - VERIFY_IS_EQUAL(Eigen::half(5.96046e-08f).x, 0x0001); - VERIFY_IS_EQUAL(Eigen::half(1.19209e-07f).x, 0x0002); - - // Verify round-to-nearest-even behavior. - float val1 = float(Eigen::half(half_impl::__half{0x3c00})); - float val2 = float(Eigen::half(half_impl::__half{0x3c01})); - float val3 = float(Eigen::half(half_impl::__half{0x3c02})); - VERIFY_IS_EQUAL(Eigen::half(0.5f * (val1 + val2)).x, 0x3c00); - VERIFY_IS_EQUAL(Eigen::half(0.5f * (val2 + val3)).x, 0x3c02); - - // Conversion from int. - VERIFY_IS_EQUAL(Eigen::half(-1).x, 0xbc00); - VERIFY_IS_EQUAL(Eigen::half(0).x, 0x0000); - VERIFY_IS_EQUAL(Eigen::half(1).x, 0x3c00); - VERIFY_IS_EQUAL(Eigen::half(2).x, 0x4000); - VERIFY_IS_EQUAL(Eigen::half(3).x, 0x4200); - - // Conversion from bool. - VERIFY_IS_EQUAL(Eigen::half(false).x, 0x0000); - VERIFY_IS_EQUAL(Eigen::half(true).x, 0x3c00); - - // Conversion to float. - VERIFY_IS_EQUAL(float(Eigen::half(half_impl::__half{0x0000})), 0.0f); - VERIFY_IS_EQUAL(float(Eigen::half(half_impl::__half{0x3c00})), 1.0f); - - // Denormals. - VERIFY_IS_APPROX(float(Eigen::half(half_impl::__half{0x8001})), -5.96046e-08f); - VERIFY_IS_APPROX(float(Eigen::half(half_impl::__half{0x0001})), 5.96046e-08f); - VERIFY_IS_APPROX(float(Eigen::half(half_impl::__half{0x0002})), 1.19209e-07f); - - // NaNs and infinities. - VERIFY(!(isinf)(float(Eigen::half(65504.0f)))); // Largest finite number. - VERIFY(!(isnan)(float(Eigen::half(0.0f)))); - VERIFY((isinf)(float(Eigen::half(half_impl::__half{0xfc00})))); - VERIFY((isnan)(float(Eigen::half(half_impl::__half{0xfc01})))); - VERIFY((isinf)(float(Eigen::half(half_impl::__half{0x7c00})))); - VERIFY((isnan)(float(Eigen::half(half_impl::__half{0x7c01})))); - VERIFY((isnan)(float(Eigen::half(0.0 / 0.0)))); - VERIFY((isinf)(float(Eigen::half(1.0 / 0.0)))); - VERIFY((isinf)(float(Eigen::half(-1.0 / 0.0)))); - - // Exactly same checks as above, just directly on the half representation. - VERIFY(!(numext::isinf)(Eigen::half(half_impl::__half{0x7bff}))); - VERIFY(!(numext::isnan)(Eigen::half(half_impl::__half{0x0000}))); - VERIFY((numext::isinf)(Eigen::half(half_impl::__half{0xfc00}))); - VERIFY((numext::isnan)(Eigen::half(half_impl::__half{0xfc01}))); - VERIFY((numext::isinf)(Eigen::half(half_impl::__half{0x7c00}))); - VERIFY((numext::isnan)(Eigen::half(half_impl::__half{0x7c01}))); - VERIFY((numext::isnan)(Eigen::half(0.0 / 0.0))); - VERIFY((numext::isinf)(Eigen::half(1.0 / 0.0))); - VERIFY((numext::isinf)(Eigen::half(-1.0 / 0.0))); -} - -void test_arithmetic() -{ - VERIFY_IS_EQUAL(float(Eigen::half(2) + Eigen::half(2)), 4); - VERIFY_IS_EQUAL(float(Eigen::half(2) + Eigen::half(-2)), 0); - VERIFY_IS_APPROX(float(Eigen::half(0.33333f) + Eigen::half(0.66667f)), 1.0f); - VERIFY_IS_EQUAL(float(Eigen::half(2.0f) * Eigen::half(-5.5f)), -11.0f); - VERIFY_IS_APPROX(float(Eigen::half(1.0f) / Eigen::half(3.0f)), 0.33333f); - VERIFY_IS_EQUAL(float(-Eigen::half(4096.0f)), -4096.0f); - VERIFY_IS_EQUAL(float(-Eigen::half(-4096.0f)), 4096.0f); -} - -void test_comparison() -{ - VERIFY(Eigen::half(1.0f) > Eigen::half(0.5f)); - VERIFY(Eigen::half(0.5f) < Eigen::half(1.0f)); - VERIFY(!(Eigen::half(1.0f) < Eigen::half(0.5f))); - VERIFY(!(Eigen::half(0.5f) > Eigen::half(1.0f))); - - VERIFY(!(Eigen::half(4.0f) > Eigen::half(4.0f))); - VERIFY(!(Eigen::half(4.0f) < Eigen::half(4.0f))); - - VERIFY(!(Eigen::half(0.0f) < Eigen::half(-0.0f))); - VERIFY(!(Eigen::half(-0.0f) < Eigen::half(0.0f))); - VERIFY(!(Eigen::half(0.0f) > Eigen::half(-0.0f))); - VERIFY(!(Eigen::half(-0.0f) > Eigen::half(0.0f))); - - VERIFY(Eigen::half(0.2f) > Eigen::half(-1.0f)); - VERIFY(Eigen::half(-1.0f) < Eigen::half(0.2f)); - VERIFY(Eigen::half(-16.0f) < Eigen::half(-15.0f)); - - VERIFY(Eigen::half(1.0f) == Eigen::half(1.0f)); - VERIFY(Eigen::half(1.0f) != Eigen::half(2.0f)); - - // Comparisons with NaNs and infinities. - VERIFY(!(Eigen::half(0.0 / 0.0) == Eigen::half(0.0 / 0.0))); - VERIFY(!(Eigen::half(0.0 / 0.0) != Eigen::half(0.0 / 0.0))); - - VERIFY(!(Eigen::half(1.0) == Eigen::half(0.0 / 0.0))); - VERIFY(!(Eigen::half(1.0) < Eigen::half(0.0 / 0.0))); - VERIFY(!(Eigen::half(1.0) > Eigen::half(0.0 / 0.0))); - VERIFY(!(Eigen::half(1.0) != Eigen::half(0.0 / 0.0))); - - VERIFY(Eigen::half(1.0) < Eigen::half(1.0 / 0.0)); - VERIFY(Eigen::half(1.0) > Eigen::half(-1.0 / 0.0)); -} - - -void test_basic_functions() -{ - VERIFY_IS_EQUAL(float(numext::abs(Eigen::half(3.5f))), 3.5f); - VERIFY_IS_EQUAL(float(numext::abs(Eigen::half(-3.5f))), 3.5f); - - VERIFY_IS_EQUAL(float(numext::floor(Eigen::half(3.5f))), 3.0f); - VERIFY_IS_EQUAL(float(numext::floor(Eigen::half(-3.5f))), -4.0f); - - VERIFY_IS_EQUAL(float(numext::ceil(Eigen::half(3.5f))), 4.0f); - VERIFY_IS_EQUAL(float(numext::ceil(Eigen::half(-3.5f))), -3.0f); - - VERIFY_IS_APPROX(float(numext::sqrt(Eigen::half(0.0f))), 0.0f); - VERIFY_IS_APPROX(float(numext::sqrt(Eigen::half(4.0f))), 2.0f); - - VERIFY_IS_APPROX(float(numext::pow(Eigen::half(0.0f), Eigen::half(1.0f))), 0.0f); - VERIFY_IS_APPROX(float(numext::pow(Eigen::half(2.0f), Eigen::half(2.0f))), 4.0f); - - VERIFY_IS_EQUAL(float(numext::exp(Eigen::half(0.0f))), 1.0f); - VERIFY_IS_APPROX(float(numext::exp(Eigen::half(EIGEN_PI))), float(20.0 + EIGEN_PI)); - - VERIFY_IS_EQUAL(float(numext::log(Eigen::half(1.0f))), 0.0f); - VERIFY_IS_APPROX(float(numext::log(Eigen::half(10.0f))), 2.30273f); -} - -void test_trigonometric_functions() -{ - VERIFY_IS_APPROX(numext::cos(Eigen::half(0.0f)), Eigen::half(cosf(0.0f))); - VERIFY_IS_APPROX(numext::cos(Eigen::half(EIGEN_PI)), Eigen::half(cosf(EIGEN_PI))); - VERIFY_IS_APPROX_OR_LESS_THAN(numext::cos(Eigen::half(EIGEN_PI/2)), NumTraits::epsilon() * Eigen::half(5)); - VERIFY_IS_APPROX_OR_LESS_THAN(numext::cos(Eigen::half(3*EIGEN_PI/2)), NumTraits::epsilon() * Eigen::half(5)); - VERIFY_IS_APPROX(numext::cos(Eigen::half(3.5f)), Eigen::half(cosf(3.5f))); - - VERIFY_IS_APPROX(numext::sin(Eigen::half(0.0f)), Eigen::half(sinf(0.0f))); - VERIFY_IS_APPROX_OR_LESS_THAN(numext::sin(Eigen::half(EIGEN_PI)), NumTraits::epsilon() * Eigen::half(10)); - - VERIFY_IS_APPROX(numext::sin(Eigen::half(EIGEN_PI/2)), Eigen::half(sinf(EIGEN_PI/2))); - VERIFY_IS_APPROX(numext::sin(Eigen::half(3*EIGEN_PI/2)), Eigen::half(sinf(3*EIGEN_PI/2))); - VERIFY_IS_APPROX(numext::sin(Eigen::half(3.5f)), Eigen::half(sinf(3.5f))); - - VERIFY_IS_APPROX(numext::tan(Eigen::half(0.0f)), Eigen::half(tanf(0.0f))); - VERIFY_IS_APPROX_OR_LESS_THAN(numext::tan(Eigen::half(EIGEN_PI)), NumTraits::epsilon() * Eigen::half(10)); - VERIFY_IS_APPROX(numext::tan(Eigen::half(3.5f)), Eigen::half(tanf(3.5f))); -} - -void test_cxx11_float16() -{ - CALL_SUBTEST(test_conversion()); - CALL_SUBTEST(test_arithmetic()); - CALL_SUBTEST(test_comparison()); - CALL_SUBTEST(test_basic_functions()); - CALL_SUBTEST(test_trigonometric_functions()); -} -- cgit v1.2.3 From b50d8f8c4a2cdc3aaa436ea183324eca45a3aa97 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 3 Aug 2016 16:50:13 -0700 Subject: Extended a regression test to validate that we basic fp16 support works with cuda 7.0 --- unsupported/test/cxx11_tensor_of_float16_cuda.cu | 40 ++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/unsupported/test/cxx11_tensor_of_float16_cuda.cu index b6df5a4d2..9a1de6c34 100644 --- a/unsupported/test/cxx11_tensor_of_float16_cuda.cu +++ b/unsupported/test/cxx11_tensor_of_float16_cuda.cu @@ -19,6 +19,44 @@ using Eigen::Tensor; +template +void test_cuda_numext() { + Eigen::CudaStreamDevice stream; + Eigen::GpuDevice gpu_device(&stream); + int num_elem = 101; + + float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float)); + bool* d_res_half = (bool*)gpu_device.allocate(num_elem * sizeof(bool)); + bool* d_res_float = (bool*)gpu_device.allocate(num_elem * sizeof(bool)); + + Eigen::TensorMap, Eigen::Aligned> gpu_float( + d_float, num_elem); + Eigen::TensorMap, Eigen::Aligned> gpu_res_half( + d_res_half, num_elem); + Eigen::TensorMap, Eigen::Aligned> gpu_res_float( + d_res_float, num_elem); + + gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f); + gpu_res_float.device(gpu_device) = gpu_float.unaryExpr(Eigen::internal::scalar_isnan_op()); + gpu_res_half.device(gpu_device) = gpu_float.cast().unaryExpr(Eigen::internal::scalar_isnan_op()); + + Tensor half_prec(num_elem); + Tensor full_prec(num_elem); + gpu_device.memcpyDeviceToHost(half_prec.data(), d_res_half, num_elem*sizeof(bool)); + gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(bool)); + gpu_device.synchronize(); + + for (int i = 0; i < num_elem; ++i) { + std::cout << "Checking unary " << i << std::endl; + VERIFY_IS_EQUAL(full_prec(i), half_prec(i)); + } + + gpu_device.deallocate(d_float); + gpu_device.deallocate(d_res_half); + gpu_device.deallocate(d_res_float); +} + + #ifdef EIGEN_HAS_CUDA_FP16 template @@ -415,6 +453,8 @@ void test_cuda_forced_evals() { void test_cxx11_tensor_of_float16_cuda() { + CALL_SUBTEST_1(test_cuda_numext()); + #ifdef EIGEN_HAS_CUDA_FP16 CALL_SUBTEST_1(test_cuda_conversion()); CALL_SUBTEST_1(test_cuda_unary()); -- cgit v1.2.3 From 9506343349556cd74dd359a9219bfcb295716930 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 4 Aug 2016 17:25:53 -0700 Subject: Fixed the isnan, isfinite and isinf operations on GPU --- Eigen/src/Core/arch/CUDA/Half.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h index 6ae2c53c5..99846cb0b 100644 --- a/Eigen/src/Core/arch/CUDA/Half.h +++ b/Eigen/src/Core/arch/CUDA/Half.h @@ -544,4 +544,31 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::half __ldg(const Eigen::half* ptr) } #endif + +#if defined(__CUDA_ARCH__) +namespace Eigen { +namespace numext { + +template<> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +bool (isnan)(const Eigen::half& h) { + return (half_impl::isnan)(h); +} + +template<> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +bool (isinf)(const Eigen::half& h) { + return (half_impl::isinf)(h); +} + +template<> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE +bool (isfinite)(const Eigen::half& h) { + return (half_impl::isfinite)(h); +} + +} // namespace Eigen +} // namespace numext +#endif + #endif // EIGEN_HALF_CUDA_H -- cgit v1.2.3 From 5eea1c7f977253bfc1d1540e1db0fb616e41b61c Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 4 Aug 2016 17:34:13 -0700 Subject: Fixed cut and paste bug in debud message --- unsupported/test/cxx11_tensor_of_float16_cuda.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/test/cxx11_tensor_of_float16_cuda.cu b/unsupported/test/cxx11_tensor_of_float16_cuda.cu index 9a1de6c34..2f55f9361 100644 --- a/unsupported/test/cxx11_tensor_of_float16_cuda.cu +++ b/unsupported/test/cxx11_tensor_of_float16_cuda.cu @@ -47,7 +47,7 @@ void test_cuda_numext() { gpu_device.synchronize(); for (int i = 0; i < num_elem; ++i) { - std::cout << "Checking unary " << i << std::endl; + std::cout << "Checking numext " << i << std::endl; VERIFY_IS_EQUAL(full_prec(i), half_prec(i)); } -- cgit v1.2.3 From fe778427f2aab125a1d1906192bb4d8a73a57a12 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 4 Aug 2016 18:32:26 -0700 Subject: Fixed the constructors of the new half_base class. --- Eigen/src/Core/arch/CUDA/Half.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h index 99846cb0b..c2ff207ba 100644 --- a/Eigen/src/Core/arch/CUDA/Half.h +++ b/Eigen/src/Core/arch/CUDA/Half.h @@ -65,8 +65,8 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __half float_to_half_rtne(float ff); EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float half_to_float(__half h); struct half_base : public __half { - explicit EIGEN_DEVICE_FUNC half_base(unsigned short raw) : __half(raw) {} EIGEN_DEVICE_FUNC half_base() {} + EIGEN_DEVICE_FUNC half_base(const half_base& h) : __half(h) {} EIGEN_DEVICE_FUNC half_base(const __half& h) : __half(h) {} }; -- cgit v1.2.3 From fe4b927e9c8e796a07c5864e58630e888979519e Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Fri, 5 Aug 2016 15:21:14 +0200 Subject: Add aliases Eigen_*_DIR to Eigen3_*_DIR This is to make configuring work again after project was renamed from Eigen to Eigen3 --- CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index b4d28125a..76ec09ea0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,6 +8,11 @@ if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) message(FATAL_ERROR "In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there. You may need to remove CMakeCache.txt. ") endif() +# Alias Eigen_*_DIR to Eigen3_*_DIR: + +set(Eigen_SOURCE_DIR ${Eigen3_SOURCE_DIR}) +set(Eigen_BINARY_DIR ${Eigen3_BINARY_DIR}) + # guard against bad build-type strings if (NOT CMAKE_BUILD_TYPE) -- cgit v1.2.3 -- cgit v1.2.3 From 1031223c095c7685347b4930e81b390ee88c35e0 Mon Sep 17 00:00:00 2001 From: Ziming Dong Date: Sat, 6 Aug 2016 19:48:50 +0800 Subject: fix tanh inconsistent --- Eigen/src/Core/functors/UnaryFunctors.h | 117 ++++++++++++++++++++++++-------- 1 file changed, 89 insertions(+), 28 deletions(-) diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index 04208c9fe..e2f3d869f 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h @@ -491,19 +491,62 @@ struct functor_traits > }; }; - /** \internal * \brief Template functor to compute the tanh of a scalar * \sa class CwiseUnaryOp, ArrayBase::tanh() */ -template struct scalar_tanh_op { +template +struct scalar_tanh_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_tanh_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { return numext::tanh(a); } + EIGEN_DEVICE_FUNC inline const Scalar operator()(const Scalar& a) const { + /** \internal \returns the hyperbolic tan of \a a (coeff-wise) + Doesn't do anything fancy, just a 13/6-degree rational interpolant + which + is accurate up to a couple of ulp in the range [-9, 9], outside of + which + the fl(tanh(x)) = +/-1. */ + + // Clamp the inputs to the range [-9, 9] since anything outside + // this range is +/-1.0f in single-precision. + const Scalar plus_9 = static_cast(9.0); + const Scalar minus_9 = static_cast(-9.0); + const Scalar x = numext::maxi(minus_9, numext::mini(plus_9, a)); + // Scalarhe monomial coefficients of the numerator polynomial (odd). + const Scalar alpha_1 = static_cast(4.89352455891786e-03); + const Scalar alpha_3 = static_cast(6.37261928875436e-04); + const Scalar alpha_5 = static_cast(1.48572235717979e-05); + const Scalar alpha_7 = static_cast(5.12229709037114e-08); + const Scalar alpha_9 = static_cast(-8.60467152213735e-11); + const Scalar alpha_11 = static_cast(2.00018790482477e-13); + const Scalar alpha_13 = static_cast(-2.76076847742355e-16); + // Scalarhe monomial coefficients of the denominator polynomial (even). + const Scalar beta_0 = static_cast(4.89352518554385e-03); + const Scalar beta_2 = static_cast(2.26843463243900e-03); + const Scalar beta_4 = static_cast(1.18534705686654e-04); + const Scalar beta_6 = static_cast(1.19825839466702e-06); + // Since the polynomials are odd/even, we need x^2. + const Scalar x2 = x * x; + // Evaluate the numerator polynomial p. + Scalar p = x2 * alpha_13 + alpha_11; + p = x2 * p + alpha_9; + p = x2 * p + alpha_7; + p = x2 * p + alpha_5; + p = x2 * p + alpha_3; + p = x2 * p + alpha_1; + p = x * p; + // Evaluate the denominator polynomial p. + Scalar q = x2 * beta_6 + beta_4; + q = x2 * q + beta_2; + q = x2 * q + beta_0; + // Divide the numerator by the denominator. + return p / q; + } template EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& _x) const { /** \internal \returns the hyperbolic tan of \a a (coeff-wise) Doesn't do anything fancy, just a 13/6-degree rational interpolant which - is accurate up to a couple of ulp in the range [-9, 9], outside of which the + is accurate up to a couple of ulp in the range [-9, 9], outside of which + the fl(tanh(x)) = +/-1. */ // Clamp the inputs to the range [-9, 9] since anything outside @@ -511,7 +554,7 @@ template struct scalar_tanh_op { const Packet plus_9 = pset1(9.0); const Packet minus_9 = pset1(-9.0); const Packet x = pmax(minus_9, pmin(plus_9, _x)); - + // The monomial coefficients of the numerator polynomial (odd). const Packet alpha_1 = pset1(4.89352455891786e-03); const Packet alpha_3 = pset1(6.37261928875436e-04); @@ -520,17 +563,17 @@ template struct scalar_tanh_op { const Packet alpha_9 = pset1(-8.60467152213735e-11); const Packet alpha_11 = pset1(2.00018790482477e-13); const Packet alpha_13 = pset1(-2.76076847742355e-16); - + // The monomial coefficients of the denominator polynomial (even). const Packet beta_0 = pset1(4.89352518554385e-03); const Packet beta_2 = pset1(2.26843463243900e-03); const Packet beta_4 = pset1(1.18534705686654e-04); const Packet beta_6 = pset1(1.19825839466702e-06); - + // Since the polynomials are odd/even, we need x^2. const Packet x2 = pmul(x, x); - - // Evaluate the numerator polynomial p. + + // Evaluate the numerator polynomial p. Packet p = pmadd(x2, alpha_13, alpha_11); p = pmadd(x2, p, alpha_9); p = pmadd(x2, p, alpha_7); @@ -538,38 +581,56 @@ template struct scalar_tanh_op { p = pmadd(x2, p, alpha_3); p = pmadd(x2, p, alpha_1); p = pmul(x, p); - + // Evaluate the denominator polynomial p. Packet q = pmadd(x2, beta_6, beta_4); q = pmadd(x2, q, beta_2); q = pmadd(x2, q, beta_0); - + // Divide the numerator by the denominator. return pdiv(p, q); } }; -template -struct functor_traits > -{ +template <> +struct scalar_tanh_op > { + EIGEN_DEVICE_FUNC inline const std::complex operator()( + const std::complex& a) const { + return numext::tanh(a); + } +}; +template <> +struct scalar_tanh_op > { + EIGEN_DEVICE_FUNC inline const std::complex operator()( + const std::complex& a) const { + return numext::tanh(a); + } +}; +template +struct functor_traits > { enum { PacketAccess = packet_traits::HasTanh, - Cost = - (PacketAccess - // The following numbers are based on the AVX implementation, + Cost = (PacketAccess && (!is_same >::value) && + (!is_same >::value) +// The following numbers are based on the AVX implementation, #ifdef EIGEN_VECTORIZE_FMA - // Haswell can issue 2 add/mul/madd per cycle. - // 9 pmadd, 2 pmul, 1 div, 2 other - ? (2 * NumTraits::AddCost + 6 * NumTraits::MulCost + - NumTraits::template Div::HasDiv>::Cost) + // Haswell can issue 2 add/mul/madd per cycle. + // 9 pmadd, 2 pmul, 1 div, 2 other + ? (2 * NumTraits::AddCost + + 6 * NumTraits::MulCost + + NumTraits::template Div< + packet_traits::HasDiv>::Cost) #else - ? (11 * NumTraits::AddCost + - 11 * NumTraits::MulCost + - NumTraits::template Div::HasDiv>::Cost) + ? (11 * NumTraits::AddCost + + 11 * NumTraits::MulCost + + NumTraits::template Div< + packet_traits::HasDiv>::Cost) #endif - // This number assumes a naive implementation of tanh - : (6 * NumTraits::AddCost + 3 * NumTraits::MulCost + - 2 * NumTraits::template Div::HasDiv>::Cost + - functor_traits >::Cost)) + // This number assumes a naive implementation of tanh + : (6 * NumTraits::AddCost + + 3 * NumTraits::MulCost + + 2 * NumTraits::template Div< + packet_traits::HasDiv>::Cost + + functor_traits >::Cost)) }; }; -- cgit v1.2.3 From 3e4a33d4bac400f857fc165c9b119901c1c7f5e5 Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Mon, 8 Aug 2016 17:26:48 +0200 Subject: bug #1272: Let CommaInitializer work for more border cases (enhances fix of bug #1242). The unit test tests all combinations of 2x2 block-sizes from 0 to 3. --- Eigen/src/Core/CommaInitializer.h | 17 ++------- test/commainitializer.cpp | 78 ++++++++++++++++++++++++++++----------- 2 files changed, 61 insertions(+), 34 deletions(-) diff --git a/Eigen/src/Core/CommaInitializer.h b/Eigen/src/Core/CommaInitializer.h index 787743b8f..b39a661d0 100644 --- a/Eigen/src/Core/CommaInitializer.h +++ b/Eigen/src/Core/CommaInitializer.h @@ -80,12 +80,7 @@ struct CommaInitializer EIGEN_DEVICE_FUNC CommaInitializer& operator,(const DenseBase& other) { - if(other.rows()==0) - { - m_col += other.cols(); - return *this; - } - if (m_col==m_xpr.cols()) + if (m_col==m_xpr.cols() && (other.cols()!=0 || other.rows()!=m_currentBlockRows)) { m_row+=m_currentBlockRows; m_col = 0; @@ -93,15 +88,11 @@ struct CommaInitializer eigen_assert(m_row+m_currentBlockRows<=m_xpr.rows() && "Too many rows passed to comma initializer (operator<<)"); } - eigen_assert((m_col - (m_row, m_col) = other; - else - m_xpr.block(m_row, m_col, other.rows(), other.cols()) = other; + m_xpr.template block + (m_row, m_col, other.rows(), other.cols()) = other; m_col += other.cols(); return *this; } diff --git a/test/commainitializer.cpp b/test/commainitializer.cpp index 86bdb040e..5ece4f6cc 100644 --- a/test/commainitializer.cpp +++ b/test/commainitializer.cpp @@ -9,6 +9,61 @@ #include "main.h" + +template +void test_blocks() +{ + Matrix m_fixed; + MatrixXi m_dynamic(M1+M2, N1+N2); + + Matrix mat11; mat11.setRandom(); + Matrix mat12; mat12.setRandom(); + Matrix mat21; mat21.setRandom(); + Matrix mat22; mat22.setRandom(); + + MatrixXi matx11 = mat11, matx12 = mat12, matx21 = mat21, matx22 = mat22; + + // The only remaining border case is M1==M2>0 && N1==N2==0. + // In that case it is not possible to decide (without backtracking) if a block starts a new row or does not + if(M1 != M2 || M1 == 0 || N1>0 || N2>0) + { + VERIFY_IS_EQUAL((m_fixed << mat11, mat12, mat21, matx22).finished(), (m_dynamic << mat11, matx12, mat21, matx22).finished()); + VERIFY_IS_EQUAL((m_fixed << mat12, mat11, matx21, mat22).finished(), (m_dynamic << mat12, matx11, matx21, mat22).finished()); + } + + if(N1 > 0) + { + VERIFY_RAISES_ASSERT((m_fixed << mat11, mat12, mat11, mat21, mat22)); + VERIFY_RAISES_ASSERT((m_fixed << mat11, mat12, mat21, mat21, mat22)); + } + else if(N2 > 0 || M1 != M2) // border case if both sublocks have zero columns and same number of rows + { + // allow insertion of zero-column blocks: + VERIFY_IS_EQUAL((m_fixed << mat11, mat12, mat11, mat11, mat21, mat21, mat22).finished(), (m_dynamic << mat12, mat22).finished()); + } + if(M1 != M2) + { + VERIFY_RAISES_ASSERT((m_fixed << mat11, mat21, mat12, mat22)); + } +} + + +template +struct test_block_recursion +{ + static void run() + { + test_blocks<(N>>6)&3, (N>>4)&3, (N>>2)&3, N & 3>(); + test_block_recursion::run(); + } +}; + +template<> +struct test_block_recursion<-1> +{ + static void run() { } +}; + void test_commainitializer() { Matrix3d m3; @@ -45,25 +100,6 @@ void test_commainitializer() VERIFY_IS_APPROX(m3, ref); - // Check with empty matrices (bug #1242) - { - int const M = 0; - int const N1 = 2; - int const N2 = 1; - - { - Matrix A1; - Matrix A2; - Matrix B; - B << A1, A2; - } - { - Matrix A1; - Matrix A2; - Matrix B; - B << A1, - A2; - } - } - + // recursively test all block-sizes from 0 to 3: + test_block_recursion<(1<<8) - 1>(); } -- cgit v1.2.3 From aee693ac520bc2b3980893ee7b323d8c07e0f133 Mon Sep 17 00:00:00 2001 From: Igor Babuschkin Date: Mon, 8 Aug 2016 20:24:59 +0100 Subject: Add log1p support for CUDA and half floats --- Eigen/src/Core/arch/CUDA/Half.h | 3 +++ Eigen/src/Core/arch/CUDA/MathFunctions.h | 12 ++++++++++++ Eigen/src/Core/arch/CUDA/PacketMathHalf.h | 16 +++++++++++++++- test/half_float.cpp | 5 +++++ test/packetmath.cpp | 1 + 5 files changed, 36 insertions(+), 1 deletion(-) diff --git a/Eigen/src/Core/arch/CUDA/Half.h b/Eigen/src/Core/arch/CUDA/Half.h index c2ff207ba..4d91420d0 100644 --- a/Eigen/src/Core/arch/CUDA/Half.h +++ b/Eigen/src/Core/arch/CUDA/Half.h @@ -391,6 +391,9 @@ EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half exp(const half& a) { EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log(const half& a) { return half(::logf(float(a))); } +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log1p(const half& a) { + return half(::log1pf(float(a))); +} EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC half log10(const half& a) { return half(::log10f(float(a))); } diff --git a/Eigen/src/Core/arch/CUDA/MathFunctions.h b/Eigen/src/Core/arch/CUDA/MathFunctions.h index 5b5bf4c94..0348b41db 100644 --- a/Eigen/src/Core/arch/CUDA/MathFunctions.h +++ b/Eigen/src/Core/arch/CUDA/MathFunctions.h @@ -31,6 +31,18 @@ double2 plog(const double2& a) return make_double2(log(a.x), log(a.y)); } +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +float4 plog1p(const float4& a) +{ + return make_float4(log1pf(a.x), log1pf(a.y), log1pf(a.z), log1pf(a.w)); +} + +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +double2 plog1p(const double2& a) +{ + return make_double2(log1p(a.x), log1p(a.y)); +} + template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pexp(const float4& a) { diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index 0bf891d24..a757ea45b 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -34,7 +34,8 @@ template<> struct packet_traits : default_packet_traits HasSqrt = 1, HasRsqrt = 1, HasExp = 1, - HasLog = 1 + HasLog = 1, + HasLog1p = 1 }; }; @@ -273,6 +274,11 @@ half2 plog(const half2& a) { return h2log(a); } +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +half2 plog1p(const half2& a) { + return h2log1p(a); +} + template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pexp(const half2& a) { return h2exp(a); @@ -298,6 +304,14 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog(const half2& return __floats2half2_rn(r1, r2); } +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog1p(const half2& a) { + float a1 = __low2float(a); + float a2 = __high2float(a); + float r1 = log1pf(a1); + float r2 = log1pf(a2); + return __floats2half2_rn(r1, r2); +} + template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pexp(const half2& a) { float a1 = __low2float(a); float a2 = __high2float(a); diff --git a/test/half_float.cpp b/test/half_float.cpp index 6f299a346..f8d438e2f 100644 --- a/test/half_float.cpp +++ b/test/half_float.cpp @@ -189,6 +189,11 @@ void test_basic_functions() VERIFY_IS_EQUAL(float(log(half(1.0f))), 0.0f); VERIFY_IS_APPROX(float(numext::log(half(10.0f))), 2.30273f); VERIFY_IS_APPROX(float(log(half(10.0f))), 2.30273f); + + VERIFY_IS_EQUAL(float(numext::log1p(half(0.0f))), 0.0f); + VERIFY_IS_EQUAL(float(log1p(half(0.0f))), 0.0f); + VERIFY_IS_APPROX(float(numext::log1p(half(10.0f))), 2.3978953f); + VERIFY_IS_APPROX(float(log1p(half(10.0f))), 2.3978953f); } void test_trigonometric_functions() diff --git a/test/packetmath.cpp b/test/packetmath.cpp index ea1786be2..77cbf3e5b 100644 --- a/test/packetmath.cpp +++ b/test/packetmath.cpp @@ -402,6 +402,7 @@ template void packetmath_real() data1[internal::random(0, PacketSize)] = 0; CHECK_CWISE1_IF(PacketTraits::HasSqrt, std::sqrt, internal::psqrt); CHECK_CWISE1_IF(PacketTraits::HasLog, std::log, internal::plog); + CHECK_CWISE1_IF(PacketTraits::HasLog1p, std::log1p, internal::plog1p); #if EIGEN_HAS_C99_MATH && (__cplusplus > 199711L) CHECK_CWISE1_IF(internal::packet_traits::HasLGamma, std::lgamma, internal::plgamma); CHECK_CWISE1_IF(internal::packet_traits::HasErf, std::erf, internal::perf); -- cgit v1.2.3 From 64e68cbe87d18f40978fba31797fc07c2fbb9a1f Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 8 Aug 2016 19:29:59 -0700 Subject: Don't attempt to optimize partial reductions when the optimized implementation doesn't buy anything. --- .../Eigen/CXX11/src/Tensor/TensorReduction.h | 24 ++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index 0e1576954..9df697e4c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -505,12 +505,20 @@ struct TensorEvaluator, Device> (reducing_inner_dims || ReducingInnerMostDims)) { const Index num_values_to_reduce = internal::array_prod(m_reducedDims); const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); - if (!data && num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve) { + if (!data && num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 128) { data = static_cast(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); m_result = data; } Op reducer(m_reducer); - return internal::InnerReducer::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve) || (m_result != NULL); + if (internal::InnerReducer::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) { + if (m_result) { + m_device.deallocate(m_result); + m_result = NULL; + } + return true; + } else { + return (m_result != NULL); + } } bool preserving_inner_dims = true; @@ -525,12 +533,20 @@ struct TensorEvaluator, Device> preserving_inner_dims) { const Index num_values_to_reduce = internal::array_prod(m_reducedDims); const Index num_coeffs_to_preserve = internal::array_prod(m_dimensions); - if (!data && num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve) { + if (!data && num_coeffs_to_preserve < 1024 && num_values_to_reduce > num_coeffs_to_preserve && num_values_to_reduce > 32) { data = static_cast(m_device.allocate(sizeof(CoeffReturnType) * num_coeffs_to_preserve)); m_result = data; } Op reducer(m_reducer); - return internal::OuterReducer::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve) || (m_result != NULL); + if (internal::OuterReducer::run(*this, reducer, m_device, data, num_values_to_reduce, num_coeffs_to_preserve)) { + if (m_result) { + m_device.deallocate(m_result); + m_result = NULL; + } + return true; + } else { + return (m_result != NULL); + } } } return true; -- cgit v1.2.3 From e3a8dfb02fff69dc455ba724b1f42a697ea4308c Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 11 Aug 2016 15:24:06 -0700 Subject: std::erfcf doesn't exist: use numext::erfc instead --- .../Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h b/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h index 0ef440495..ec4fa8448 100644 --- a/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h +++ b/unsupported/Eigen/src/SpecialFunctions/arch/CUDA/CudaSpecialFunctions.h @@ -90,8 +90,8 @@ double2 perf(const double2& a) template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 perfc(const float4& a) { - using std::erfcf; - return make_float4(erfcf(a.x), erfcf(a.y), erfcf(a.z), erfcf(a.w)); + using numext::erfc; + return make_float4(erfc(a.x), erfc(a.y), erfc(a.z), erfc(a.w)); } template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -- cgit v1.2.3 From c83b754ee0c65881fe8e42b2b901e23fe6adbb1c Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Fri, 12 Aug 2016 15:15:34 +0200 Subject: bug #1272: Disable assertion when total number of columns is zero. Also moved assertion to finished() method and adapted unit-test --- Eigen/src/Core/CommaInitializer.h | 11 +++++++---- test/commainitializer.cpp | 9 +++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/Eigen/src/Core/CommaInitializer.h b/Eigen/src/Core/CommaInitializer.h index b39a661d0..d218e9814 100644 --- a/Eigen/src/Core/CommaInitializer.h +++ b/Eigen/src/Core/CommaInitializer.h @@ -103,9 +103,7 @@ struct CommaInitializer EIGEN_EXCEPTION_SPEC(Eigen::eigen_assert_exception) #endif { - eigen_assert((m_row+m_currentBlockRows) == m_xpr.rows() - && m_col == m_xpr.cols() - && "Too few coefficients passed to comma initializer (operator<<)"); + finished(); } /** \returns the built matrix once all its coefficients have been set. @@ -116,7 +114,12 @@ struct CommaInitializer * \endcode */ EIGEN_DEVICE_FUNC - inline XprType& finished() { return m_xpr; } + inline XprType& finished() { + eigen_assert(((m_row+m_currentBlockRows) == m_xpr.rows() || m_xpr.cols() == 0) + && m_col == m_xpr.cols() + && "Too few coefficients passed to comma initializer (operator<<)"); + return m_xpr; + } XprType& m_xpr; // target expression Index m_row; // current row id diff --git a/test/commainitializer.cpp b/test/commainitializer.cpp index 5ece4f6cc..9844adbd2 100644 --- a/test/commainitializer.cpp +++ b/test/commainitializer.cpp @@ -23,11 +23,12 @@ void test_blocks() MatrixXi matx11 = mat11, matx12 = mat12, matx21 = mat21, matx22 = mat22; - // The only remaining border case is M1==M2>0 && N1==N2==0. - // In that case it is not possible to decide (without backtracking) if a block starts a new row or does not - if(M1 != M2 || M1 == 0 || N1>0 || N2>0) { VERIFY_IS_EQUAL((m_fixed << mat11, mat12, mat21, matx22).finished(), (m_dynamic << mat11, matx12, mat21, matx22).finished()); + VERIFY_IS_EQUAL((m_fixed.template topLeftCorner()), mat11); + VERIFY_IS_EQUAL((m_fixed.template topRightCorner()), mat12); + VERIFY_IS_EQUAL((m_fixed.template bottomLeftCorner()), mat21); + VERIFY_IS_EQUAL((m_fixed.template bottomRightCorner()), mat22); VERIFY_IS_EQUAL((m_fixed << mat12, mat11, matx21, mat22).finished(), (m_dynamic << mat12, matx11, matx21, mat22).finished()); } @@ -36,7 +37,7 @@ void test_blocks() VERIFY_RAISES_ASSERT((m_fixed << mat11, mat12, mat11, mat21, mat22)); VERIFY_RAISES_ASSERT((m_fixed << mat11, mat12, mat21, mat21, mat22)); } - else if(N2 > 0 || M1 != M2) // border case if both sublocks have zero columns and same number of rows + else { // allow insertion of zero-column blocks: VERIFY_IS_EQUAL((m_fixed << mat11, mat12, mat11, mat11, mat21, mat21, mat22).finished(), (m_dynamic << mat12, mat22).finished()); -- cgit v1.2.3 From 9636a8ed437c24e6ddbdc15a45aed284c4027636 Mon Sep 17 00:00:00 2001 From: Christoph Hertzberg Date: Fri, 12 Aug 2016 15:34:21 +0200 Subject: bug #1273: Add parentheses when redefining eigen_assert --- doc/TopicAssertions.dox | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/TopicAssertions.dox b/doc/TopicAssertions.dox index 4ead40174..c8b4d84f2 100644 --- a/doc/TopicAssertions.dox +++ b/doc/TopicAssertions.dox @@ -16,7 +16,7 @@ Both eigen_assert and eigen_plain_assert are defined in Macros.h. Defining eigen #include #undef eigen_assert #define eigen_assert(x) \ - if (!x) { throw (std::runtime_error("Put your message here")); } + if (!(x)) { throw (std::runtime_error("Put your message here")); } \endcode \subsection DisableAssert Disabling assertions -- cgit v1.2.3 From fe73648c989222eda3f4bf2c7589dba16c098594 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 12 Aug 2016 10:00:43 -0700 Subject: Fixed a bug in the documentation. --- unsupported/Eigen/CXX11/src/Tensor/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/README.md b/unsupported/Eigen/CXX11/src/Tensor/README.md index fda33edda..02146527b 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/README.md +++ b/unsupported/Eigen/CXX11/src/Tensor/README.md @@ -1102,7 +1102,7 @@ Example: Reduction along two dimensions. As a special case, if you pass no parameter to a reduction operation the original tensor is reduced along *all* its dimensions. The result is a -one-dimension tensor with a single value. +scalar, represented as a zero-dimension tensor. Eigen::Tensor a(2, 3, 4); a.setValues({{{0.0f, 1.0f, 2.0f, 3.0f}, @@ -1112,7 +1112,7 @@ one-dimension tensor with a single value. {19.0f, 18.0f, 17.0f, 16.0f}, {20.0f, 21.0f, 22.0f, 23.0f}}}); // Reduce along all dimensions using the sum() operator. - Eigen::Tensor b = a.sum(); + Eigen::Tensor b = a.sum(); cout << "b" << endl << b << endl << endl; => b -- cgit v1.2.3 -- cgit v1.2.3 From 34ae80179ac67bac3dd8fcfead6544f085ddcc8c Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 15 Aug 2016 10:29:14 -0700 Subject: Use array_prod instead of calling TotalSize since TotalSize is only available on DSize. --- unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h index 5d0548b84..c23ecdbc4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -102,7 +102,7 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) { - const Index numValues = m_impl.dimensions().TotalSize(); + const Index numValues = internal::array_prod(m_impl.dimensions()); m_buffer = (CoeffReturnType*)m_device.allocate(numValues * sizeof(CoeffReturnType)); // Should initialize the memory in case we're dealing with non POD types. if (NumTraits::RequireInitialization) { -- cgit v1.2.3 From 59bacfe5201b54a6303b79bb538671d04f91dbce Mon Sep 17 00:00:00 2001 From: Igor Babuschkin Date: Mon, 15 Aug 2016 23:38:05 +0100 Subject: Fix compilation on CUDA 8 by removing call to h2log1p --- Eigen/src/Core/arch/CUDA/PacketMathHalf.h | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h index a757ea45b..84ddcea2a 100644 --- a/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +++ b/Eigen/src/Core/arch/CUDA/PacketMathHalf.h @@ -267,6 +267,14 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half predux_mul(c #endif } +template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog1p(const half2& a) { + float a1 = __low2float(a); + float a2 = __high2float(a); + float r1 = log1pf(a1); + float r2 = log1pf(a2); + return __floats2half2_rn(r1, r2); +} + #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 80000 && defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 530 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE @@ -274,11 +282,6 @@ half2 plog(const half2& a) { return h2log(a); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE -half2 plog1p(const half2& a) { - return h2log1p(a); -} - template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pexp(const half2& a) { return h2exp(a); @@ -304,14 +307,6 @@ template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog(const half2& return __floats2half2_rn(r1, r2); } -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 plog1p(const half2& a) { - float a1 = __low2float(a); - float a2 = __high2float(a); - float r1 = log1pf(a1); - float r2 = log1pf(a2); - return __floats2half2_rn(r1, r2); -} - template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pexp(const half2& a) { float a1 = __low2float(a); float a2 = __high2float(a); -- cgit v1.2.3 From cb81975714a96ecb2faf33ca242feeee3543b1db Mon Sep 17 00:00:00 2001 From: Luke Iwanski Date: Mon, 19 Sep 2016 12:44:13 +0100 Subject: Partial OpenCL support via SYCL compatible with ComputeCpp CE. --- CMakeLists.txt | 7 + Eigen/Core | 35 +- bench/tensors/README | 8 +- bench/tensors/tensor_benchmarks_sycl.cc | 37 ++ cmake/EigenTesting.cmake | 135 ++++++ cmake/FindComputeCpp.cmake | 228 ++++++++++ unsupported/Eigen/CXX11/Tensor | 2 + unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h | 5 + unsupported/Eigen/CXX11/src/Tensor/TensorBase.h | 4 +- .../Eigen/CXX11/src/Tensor/TensorBroadcasting.h | 7 +- .../Eigen/CXX11/src/Tensor/TensorDeviceSycl.h | 122 +++++ unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h | 56 ++- .../Eigen/CXX11/src/Tensor/TensorEvaluator.h | 58 ++- .../Eigen/CXX11/src/Tensor/TensorExecutor.h | 14 + .../Eigen/CXX11/src/Tensor/TensorFixedSize.h | 4 +- .../Eigen/CXX11/src/Tensor/TensorForcedEval.h | 49 +- .../CXX11/src/Tensor/TensorForwardDeclarations.h | 19 +- unsupported/Eigen/CXX11/src/Tensor/TensorMap.h | 20 +- unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h | 62 +++ .../Tensor/TensorSyclConvertToDeviceExpression.h | 238 ++++++++++ .../CXX11/src/Tensor/TensorSyclExprConstructor.h | 495 +++++++++++++++++++++ .../CXX11/src/Tensor/TensorSyclExtractAccessor.h | 466 +++++++++++++++++++ .../CXX11/src/Tensor/TensorSyclExtractFunctors.h | 313 +++++++++++++ .../Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h | 188 ++++++++ .../Eigen/CXX11/src/Tensor/TensorSyclPlaceHolder.h | 151 +++++++ .../CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h | 293 ++++++++++++ unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h | 84 ++++ .../Eigen/CXX11/src/Tensor/TensorSyclTuple.h | 264 +++++++++++ unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h | 11 +- unsupported/test/CMakeLists.txt | 7 + unsupported/test/cxx11_tensor_sycl.cpp | 157 +++++++ unsupported/test/cxx11_tensor_sycl_broadcast.cpp | 76 ++++ unsupported/test/cxx11_tensor_sycl_device.cpp | 37 ++ unsupported/test/cxx11_tensor_sycl_forced_eval.cpp | 64 +++ 34 files changed, 3652 insertions(+), 64 deletions(-) create mode 100644 bench/tensors/tensor_benchmarks_sycl.cc create mode 100644 cmake/FindComputeCpp.cmake create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolder.h create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h create mode 100644 unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h create mode 100644 unsupported/test/cxx11_tensor_sycl.cpp create mode 100644 unsupported/test/cxx11_tensor_sycl_broadcast.cpp create mode 100644 unsupported/test/cxx11_tensor_sycl_device.cpp create mode 100644 unsupported/test/cxx11_tensor_sycl_forced_eval.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 76ec09ea0..812997a29 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -429,6 +429,13 @@ else() add_subdirectory(lapack EXCLUDE_FROM_ALL) endif() +# add SYCL +option(EIGEN_TEST_SYCL "Add Sycl support." OFF) +if(EIGEN_TEST_SYCL) + set (CMAKE_MODULE_PATH "${CMAKE_ROOT}/Modules" "cmake/Modules/" "${CMAKE_MODULE_PATH}") + include(FindComputeCpp) +endif() + add_subdirectory(unsupported) add_subdirectory(demos EXCLUDE_FROM_ALL) diff --git a/Eigen/Core b/Eigen/Core index 946ed0677..3d2152acf 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -51,7 +51,40 @@ #define EIGEN_USING_STD_MATH(FUNC) using std::FUNC; #endif -#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS) +#ifdef EIGEN_USE_SYCL +#undef min +#undef max +#undef isnan +#undef isinf +#undef isfinite +#include +#endif + +// We need these predefines to determine if asserts need to be disabled for the device compiler +#if defined(__SYCL_DEVICE_ONLY__) + // Do not try asserts on SYCL! + #ifndef EIGEN_NO_DEBUG + #define EIGEN_NO_DEBUG + #endif + + #ifdef EIGEN_INTERNAL_DEBUGGING + #undef EIGEN_INTERNAL_DEBUGGING + #endif + + // Do not try to vectorize on SYCL! + #ifndef EIGEN_DONT_VECTORIZE + #define EIGEN_DONT_VECTORIZE + #endif + + #ifdef EIGEN_EXCEPTIONS + #undef EIGEN_EXCEPTIONS + #endif + + #define EIGEN_DEVICE_FUNC + +#endif + +#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_USE_SYCL) #define EIGEN_EXCEPTIONS #endif diff --git a/bench/tensors/README b/bench/tensors/README index 803cb8ef8..3a5fdbe17 100644 --- a/bench/tensors/README +++ b/bench/tensors/README @@ -11,5 +11,11 @@ nvcc tensor_benchmarks_gpu.cu benchmark_main.cc -I ../../ -std=c++11 -O2 -DNDEBU We also provide a version of the generic GPU tensor benchmarks that uses half floats (aka fp16) instead of regular floats. To compile these benchmarks, simply call the command line below. You'll need a recent GPU that supports compute capability 5.3 or higher to run them and nvcc 7.5 or higher to compile the code. nvcc tensor_benchmarks_fp16_gpu.cu benchmark_main.cc -I ../../ -std=c++11 -O2 -DNDEBUG -use_fast_math -ftz=true -arch compute_53 -o benchmarks_fp16_gpu -last but not least, we also provide a suite of benchmarks to measure the scalability of the contraction code on CPU. To compile these benchmarks, call +last but not least, we also provide a suite of benchmarks to measure the scalability of the contraction code on CPU. To compile these benchmarks, call g++ contraction_benchmarks_cpu.cc benchmark_main.cc -I ../../ -std=c++11 -O3 -DNDEBUG -pthread -mavx -o benchmarks_cpu + +To compile the benchmark for SYCL, using ComputeCpp you currently need 2 passes (only for translation units containing device code): +1. The device compilation pass that generates the device code (SYCL kernels and referenced device functions) and glue code needed by the host compiler to reference the device code from host code. +{ComputeCpp_ROOT}/bin/compute++ -I ../../ -I {ComputeCpp_ROOT}/include/ -std=c++11 -mllvm -inline-threshold=1000 -Wno-ignored-attributes -sycl -intelspirmetadata -emit-llvm -no-serial-memop -sycl-compress-name -DBUILD_PLATFORM_SPIR -DNDBUG -O3 -c tensor_benchmarks_sycl.cc +2. The host compilation pass that generates the final host binary. +clang++-3.7 -include tensor_benchmarks_sycl.sycl benchmark_main.cc tensor_benchmarks_sycl.cc -pthread -I ../../ -I {ComputeCpp_ROOT}/include/ -L {ComputeCpp_ROOT}/lib/ -lComputeCpp -lOpenCL -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++11 -o tensor_benchmark_sycl diff --git a/bench/tensors/tensor_benchmarks_sycl.cc b/bench/tensors/tensor_benchmarks_sycl.cc new file mode 100644 index 000000000..7eca4d966 --- /dev/null +++ b/bench/tensors/tensor_benchmarks_sycl.cc @@ -0,0 +1,37 @@ +#define EIGEN_USE_SYCL + +#include +#include + +#include "tensor_benchmarks.h" + +using Eigen::array; +using Eigen::SyclDevice; +using Eigen::Tensor; +using Eigen::TensorMap; +// Simple functions +template +cl::sycl::queue sycl_queue() { + return cl::sycl::queue(device_selector(), [=](cl::sycl::exception_list l) { + for (const auto& e : l) { + try { + std::rethrow_exception(e); + } catch (cl::sycl::exception e) { + std::cout << e.what() << std::endl; + } + } + }); +} + +#define BM_FuncGPU(FUNC) \ + static void BM_##FUNC(int iters, int N) { \ + StopBenchmarkTiming(); \ + cl::sycl::queue q = sycl_queue(); \ + Eigen::SyclDevice device(q); \ + BenchmarkSuite suite(device, N); \ + suite.FUNC(iters); \ + } \ + BENCHMARK_RANGE(BM_##FUNC, 10, 5000); + +BM_FuncGPU(broadcasting); +BM_FuncGPU(coeffWiseOp); diff --git a/cmake/EigenTesting.cmake b/cmake/EigenTesting.cmake index 57866d865..602ab5271 100644 --- a/cmake/EigenTesting.cmake +++ b/cmake/EigenTesting.cmake @@ -109,6 +109,103 @@ macro(ei_add_test_internal testname testname_with_suffix) endmacro(ei_add_test_internal) +# SYCL +macro(ei_add_test_internal_sycl testname testname_with_suffix) + include_directories( SYSTEM ${COMPUTECPP_PACKAGE_ROOT_DIR}/include) + set(targetname ${testname_with_suffix}) + + if(EIGEN_ADD_TEST_FILENAME_EXTENSION) + set(filename ${testname}.${EIGEN_ADD_TEST_FILENAME_EXTENSION}) + else() + set(filename ${testname}.cpp) + endif() + + set( include_file ${CMAKE_CURRENT_BINARY_DIR}/inc_${filename}) + set( bc_file ${CMAKE_CURRENT_BINARY_DIR}/${filename}) + set( host_file ${CMAKE_CURRENT_SOURCE_DIR}/${filename}) + + ADD_CUSTOM_COMMAND( + OUTPUT ${include_file} + COMMAND ${CMAKE_COMMAND} -E echo "\\#include \\\"${host_file}\\\"" > ${include_file} + COMMAND ${CMAKE_COMMAND} -E echo "\\#include \\\"${bc_file}.sycl\\\"" >> ${include_file} + DEPENDS ${filename} + COMMENT "Building ComputeCpp integration header file ${include_file}" + ) + # Add a custom target for the generated integration header + add_custom_target(${testname}_integration_header_woho DEPENDS ${include_file}) + + add_executable(${targetname} ${include_file}) + add_dependencies(${targetname} ${testname}_integration_header_woho) + add_sycl_to_target(${targetname} ${filename} ${CMAKE_CURRENT_BINARY_DIR}) + + if (targetname MATCHES "^eigen2_") + add_dependencies(eigen2_buildtests ${targetname}) + else() + add_dependencies(buildtests ${targetname}) + endif() + + if(EIGEN_NO_ASSERTION_CHECKING) + ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_NO_ASSERTION_CHECKING=1") + else(EIGEN_NO_ASSERTION_CHECKING) + if(EIGEN_DEBUG_ASSERTS) + ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_DEBUG_ASSERTS=1") + endif(EIGEN_DEBUG_ASSERTS) + endif(EIGEN_NO_ASSERTION_CHECKING) + + ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_TEST_MAX_SIZE=${EIGEN_TEST_MAX_SIZE}") + + ei_add_target_property(${targetname} COMPILE_FLAGS "-DEIGEN_TEST_FUNC=${testname}") + + if(MSVC AND NOT EIGEN_SPLIT_LARGE_TESTS) + ei_add_target_property(${targetname} COMPILE_FLAGS "/bigobj") + endif() + + # let the user pass flags. + if(${ARGC} GREATER 2) + ei_add_target_property(${targetname} COMPILE_FLAGS "${ARGV2}") + endif(${ARGC} GREATER 2) + + if(EIGEN_TEST_CUSTOM_CXX_FLAGS) + ei_add_target_property(${targetname} COMPILE_FLAGS "${EIGEN_TEST_CUSTOM_CXX_FLAGS}") + endif() + + if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO) + target_link_libraries(${targetname} ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO}) + endif() + if(EXTERNAL_LIBS) + target_link_libraries(${targetname} ${EXTERNAL_LIBS}) + endif() + if(EIGEN_TEST_CUSTOM_LINKER_FLAGS) + target_link_libraries(${targetname} ${EIGEN_TEST_CUSTOM_LINKER_FLAGS}) + endif() + + if(${ARGC} GREATER 3) + set(libs_to_link ${ARGV3}) + # it could be that some cmake module provides a bad library string " " (just spaces), + # and that severely breaks target_link_libraries ("can't link to -l-lstdc++" errors). + # so we check for strings containing only spaces. + string(STRIP "${libs_to_link}" libs_to_link_stripped) + string(LENGTH "${libs_to_link_stripped}" libs_to_link_stripped_length) + if(${libs_to_link_stripped_length} GREATER 0) + # notice: no double quotes around ${libs_to_link} here. It may be a list. + target_link_libraries(${targetname} ${libs_to_link}) + endif() + endif() + + add_test(${testname_with_suffix} "${targetname}") + + # Specify target and test labels according to EIGEN_CURRENT_SUBPROJECT + get_property(current_subproject GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT) + if ((current_subproject) AND (NOT (current_subproject STREQUAL ""))) + set_property(TARGET ${targetname} PROPERTY LABELS "Build${current_subproject}") + add_dependencies("Build${current_subproject}" ${targetname}) + set_property(TEST ${testname_with_suffix} PROPERTY LABELS "${current_subproject}") + endif() + + +endmacro(ei_add_test_internal_sycl) + + # Macro to add a test # # the unique mandatory parameter testname must correspond to a file @@ -185,6 +282,39 @@ macro(ei_add_test testname) endif(EIGEN_SPLIT_LARGE_TESTS AND suffixes) endmacro(ei_add_test) +macro(ei_add_test_sycl testname) + get_property(EIGEN_TESTS_LIST GLOBAL PROPERTY EIGEN_TESTS_LIST) + set(EIGEN_TESTS_LIST "${EIGEN_TESTS_LIST}${testname}\n") + set_property(GLOBAL PROPERTY EIGEN_TESTS_LIST "${EIGEN_TESTS_LIST}") + + if(EIGEN_ADD_TEST_FILENAME_EXTENSION) + set(filename ${testname}.${EIGEN_ADD_TEST_FILENAME_EXTENSION}) + else() + set(filename ${testname}.cpp) + endif() + + file(READ "${filename}" test_source) + set(parts 0) + string(REGEX MATCHALL "CALL_SUBTEST_[0-9]+|EIGEN_TEST_PART_[0-9]+|EIGEN_SUFFIXES(;[0-9]+)+" + occurences "${test_source}") + string(REGEX REPLACE "CALL_SUBTEST_|EIGEN_TEST_PART_|EIGEN_SUFFIXES" "" suffixes "${occurences}") + list(REMOVE_DUPLICATES suffixes) + if(EIGEN_SPLIT_LARGE_TESTS AND suffixes) + add_custom_target(${testname}) + foreach(suffix ${suffixes}) + ei_add_test_internal_sycl(${testname} ${testname}_${suffix} + "${ARGV1} -DEIGEN_TEST_PART_${suffix}=1" "${ARGV2}") + add_dependencies(${testname} ${testname}_${suffix}) + endforeach(suffix) + else(EIGEN_SPLIT_LARGE_TESTS AND suffixes) + set(symbols_to_enable_all_parts "") + foreach(suffix ${suffixes}) + set(symbols_to_enable_all_parts + "${symbols_to_enable_all_parts} -DEIGEN_TEST_PART_${suffix}=1") + endforeach(suffix) + ei_add_test_internal_sycl(${testname} ${testname} "${ARGV1} ${symbols_to_enable_all_parts}" "${ARGV2}") + endif(EIGEN_SPLIT_LARGE_TESTS AND suffixes) +endmacro(ei_add_test_sycl) # adds a failtest, i.e. a test that succeed if the program fails to compile # note that the test runner for these is CMake itself, when passed -DEIGEN_FAILTEST=ON @@ -330,6 +460,11 @@ macro(ei_testing_print_summary) message(STATUS "C++11: OFF") endif() + if(EIGEN_TEST_SYCL) + message(STATUS "SYCL: ON") + else() + message(STATUS "SYCL: OFF") + endif() if(EIGEN_TEST_CUDA) if(EIGEN_TEST_CUDA_CLANG) message(STATUS "CUDA: ON (using clang)") diff --git a/cmake/FindComputeCpp.cmake b/cmake/FindComputeCpp.cmake new file mode 100644 index 000000000..3aab5b833 --- /dev/null +++ b/cmake/FindComputeCpp.cmake @@ -0,0 +1,228 @@ +#.rst: +# FindComputeCpp +#--------------- + +######################### +# FindComputeCpp.cmake +######################### +# +# Tools for finding and building with ComputeCpp. +# + +# Require CMake version 3.2.2 or higher +cmake_minimum_required(VERSION 3.2.2) + +# Check that a supported host compiler can be found +if(CMAKE_COMPILER_IS_GNUCXX) + # Require at least gcc 4.8 + if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.8) + message(FATAL_ERROR + "host compiler - Not found! (gcc version must be at least 4.8)") + # Require the GCC dual ABI to be disabled for 5.1 or higher + elseif (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 5.1) + set(COMPUTECPP_DISABLE_GCC_DUAL_ABI "True") + message(STATUS + "host compiler - gcc ${CMAKE_CXX_COMPILER_VERSION} (note pre 5.1 gcc ABI enabled)") + else() + message(STATUS "host compiler - gcc ${CMAKE_CXX_COMPILER_VERSION}") + endif() +elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") + # Require at least clang 3.6 + if (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 3.6) + message(FATAL_ERROR + "host compiler - Not found! (clang version must be at least 3.6)") + else() + set(COMPUTECPP_DISABLE_GCC_DUAL_ABI "True") + message(STATUS "host compiler - clang ${CMAKE_CXX_COMPILER_VERSION}") + endif() +else() + message(WARNING + "host compiler - Not found! (ComputeCpp supports GCC and Clang, see readme)") +endif() + +set(COMPUTECPP_64_BIT_DEFAULT ON) +option(COMPUTECPP_64_BIT_CODE "Compile device code in 64 bit mode" + ${COMPUTECPP_64_BIT_DEFAULT}) +mark_as_advanced(COMPUTECPP_64_BIT_CODE) + +# Find OpenCL package +find_package(OpenCL REQUIRED) + +# Find ComputeCpp package +if(EXISTS ${COMPUTECPP_PACKAGE_ROOT_DIR}) + message(STATUS "ComputeCpp package - Found (${COMPUTECPP_PACKAGE_ROOT_DIR})") +else() + message(FATAL_ERROR "ComputeCpp package - Not found! (please set COMPUTECPP_PACKAGE_ROOT_DIR) (${COMPUTECPP_PACKAGE_ROOT_DIR})") +endif() +option(COMPUTECPP_PACKAGE_ROOT_DIR "Path to the ComputeCpp Package") + +# Obtain the path to compute++ +find_program(COMPUTECPP_DEVICE_COMPILER compute++ PATHS + ${COMPUTECPP_PACKAGE_ROOT_DIR} PATH_SUFFIXES bin) +if (EXISTS ${COMPUTECPP_DEVICE_COMPILER}) + mark_as_advanced(COMPUTECPP_DEVICE_COMPILER) + message(STATUS "compute++ - Found (${COMPUTECPP_PACKAGE_ROOT_DIR})") +else() + message(FATAL_ERROR "compute++ - Not found! (${COMPUTECPP_DEVICE_COMPILER}) (${COMPUTECPP_PACKAGE_ROOT_DIR})") +endif() + +# Obtain the path to computecpp_info +find_program(COMPUTECPP_INFO_TOOL computecpp_info PATHS + ${COMPUTECPP_PACKAGE_ROOT_DIR} PATH_SUFFIXES bin) +if (EXISTS ${COMPUTECPP_INFO_TOOL}) + mark_as_advanced(${COMPUTECPP_INFO_TOOL}) + message(STATUS "computecpp_info - Found (${COMPUTECPP_PACKAGE_ROOT_DIR})") +else() + message(FATAL_ERROR "computecpp_info - Not found! (${COMPUTECPP_INFO_TOOL}) (${COMPUTECPP_PACKAGE_ROOT_DIR})") +endif() + +# Obtain the path to the ComputeCpp runtime library +find_library(COMPUTECPP_RUNTIME_LIBRARY ComputeCpp PATHS ${COMPUTECPP_PACKAGE_ROOT_DIR} + HINTS ${COMPUTECPP_PACKAGE_ROOT_DIR}/lib PATH_SUFFIXES lib + DOC "ComputeCpp Runtime Library" NO_DEFAULT_PATH) + +if (EXISTS ${COMPUTECPP_RUNTIME_LIBRARY}) + mark_as_advanced(COMPUTECPP_RUNTIME_LIBRARY) + message(STATUS "libComputeCpp.so - Found") +else() + message(FATAL_ERROR "libComputeCpp.so - Not found! (${COMPUTECPP_PACKAGE_ROOT_DIR})") +endif() + +# Obtain the ComputeCpp include directory +set(COMPUTECPP_INCLUDE_DIRECTORY ${COMPUTECPP_PACKAGE_ROOT_DIR}/include/) +if (NOT EXISTS ${COMPUTECPP_INCLUDE_DIRECTORY}) + message(FATAL_ERROR "ComputeCpp includes - Not found! (${COMPUTECPP_PACKAGE_ROOT_DIR}/include/)") +else() + message(STATUS "ComputeCpp includes - Found (${COMPUTECPP_PACKAGE_ROOT_DIR})") +endif() + +# Obtain the package version +execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-version" + OUTPUT_VARIABLE COMPUTECPP_PACKAGE_VERSION + RESULT_VARIABLE COMPUTECPP_INFO_TOOL_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE) +if(NOT COMPUTECPP_INFO_TOOL_RESULT EQUAL "0") + message(FATAL_ERROR "Package version - Error obtaining version!") +else() + mark_as_advanced(COMPUTECPP_PACKAGE_VERSION) + message(STATUS "Package version - ${COMPUTECPP_PACKAGE_VERSION}") +endif() + +# Obtain the device compiler flags +execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-device-compiler-flags" + OUTPUT_VARIABLE COMPUTECPP_DEVICE_COMPILER_FLAGS + RESULT_VARIABLE COMPUTECPP_INFO_TOOL_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE) +if(NOT COMPUTECPP_INFO_TOOL_RESULT EQUAL "0") + message(FATAL_ERROR "compute++ flags - Error obtaining compute++ flags!") +else() + mark_as_advanced(COMPUTECPP_COMPILER_FLAGS) + message(STATUS "compute++ flags - ${COMPUTECPP_DEVICE_COMPILER_FLAGS}") +endif() + +set(COMPUTECPP_DEVICE_COMPILER_FLAGS ${COMPUTECPP_DEVICE_COMPILER_FLAGS} -sycl-compress-name -no-serial-memop -DEIGEN_NO_ASSERTION_CHECKING=1) + +# Check if the platform is supported +execute_process(COMMAND ${COMPUTECPP_INFO_TOOL} "--dump-is-supported" + OUTPUT_VARIABLE COMPUTECPP_PLATFORM_IS_SUPPORTED + RESULT_VARIABLE COMPUTECPP_INFO_TOOL_RESULT OUTPUT_STRIP_TRAILING_WHITESPACE) +if(NOT COMPUTECPP_INFO_TOOL_RESULT EQUAL "0") + message(FATAL_ERROR "platform - Error checking platform support!") +else() + mark_as_advanced(COMPUTECPP_PLATFORM_IS_SUPPORTED) + if (COMPUTECPP_PLATFORM_IS_SUPPORTED) + message(STATUS "platform - your system can support ComputeCpp") + else() + message(STATUS "platform - your system CANNOT support ComputeCpp") + endif() +endif() + +#################### +# __build_sycl +#################### +# +# Adds a custom target for running compute++ and adding a dependency for the +# resulting integration header. +# +# targetName : Name of the target. +# sourceFile : Source file to be compiled. +# binaryDir : Intermediate output directory for the integration header. +# +function(__build_spir targetName sourceFile binaryDir) + + # Retrieve source file name. + get_filename_component(sourceFileName ${sourceFile} NAME) + + # Set the path to the Sycl file. + set(outputSyclFile ${binaryDir}/${sourceFileName}.sycl) + + # Add any user-defined include to the device compiler + get_property(includeDirectories DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY + INCLUDE_DIRECTORIES) + set(device_compiler_includes "") + foreach(directory ${includeDirectories}) + set(device_compiler_includes "-I${directory}" ${device_compiler_includes}) + endforeach() + if (CMAKE_INCLUDE_PATH) + foreach(directory ${CMAKE_INCLUDE_PATH}) + set(device_compiler_includes "-I${directory}" + ${device_compiler_includes}) + endforeach() + endif() + + # Convert argument list format + separate_arguments(COMPUTECPP_DEVICE_COMPILER_FLAGS) + + # Add custom command for running compute++ + add_custom_command( + OUTPUT ${outputSyclFile} + COMMAND ${COMPUTECPP_DEVICE_COMPILER} + ${COMPUTECPP_DEVICE_COMPILER_FLAGS} + -I${COMPUTECPP_INCLUDE_DIRECTORY} + ${COMPUTECPP_PLATFORM_SPECIFIC_ARGS} + ${device_compiler_includes} + -o ${outputSyclFile} + -c ${CMAKE_CURRENT_SOURCE_DIR}/${sourceFile} + DEPENDS ${sourceFile} + COMMENT "Building ComputeCpp integration header file ${outputSyclFile}") + + # Add a custom target for the generated integration header + add_custom_target(${targetName}_integration_header DEPENDS ${outputSyclFile}) + + # Add a dependency on the integration header + add_dependencies(${targetName} ${targetName}_integration_header) + + # Force inclusion of the integration header for the host compiler + #set(compileFlags -include ${include_file} "-Wall") + target_compile_options(${targetName} PUBLIC ${compileFlags}) + + # Set the host compiler C++ standard to C++11 + set_property(TARGET ${targetName} PROPERTY CXX_STANDARD 11) + + # Disable GCC dual ABI on GCC 5.1 and higher + if(COMPUTECPP_DISABLE_GCC_DUAL_ABI) + set_property(TARGET ${targetName} APPEND PROPERTY COMPILE_DEFINITIONS + "_GLIBCXX_USE_CXX11_ABI=0") + endif() + +endfunction() + +####################### +# add_sycl_to_target +####################### +# +# Adds a SYCL compilation custom command associated with an existing +# target and sets a dependency on that new command. +# +# targetName : Name of the target to add a SYCL to. +# sourceFile : Source file to be compiled for SYCL. +# binaryDir : Intermediate output directory for the integration header. +# +function(add_sycl_to_target targetName sourceFile binaryDir) + + # Add custom target to run compute++ and generate the integration header + __build_spir(${targetName} ${sourceFile} ${binaryDir}) + + # Link with the ComputeCpp runtime library + target_link_libraries(${targetName} PUBLIC ${COMPUTECPP_RUNTIME_LIBRARY} + PUBLIC ${OpenCL_LIBRARIES}) + +endfunction(add_sycl_to_target) diff --git a/unsupported/Eigen/CXX11/Tensor b/unsupported/Eigen/CXX11/Tensor index f7b94cee1..da6a3f301 100644 --- a/unsupported/Eigen/CXX11/Tensor +++ b/unsupported/Eigen/CXX11/Tensor @@ -74,6 +74,8 @@ typedef unsigned __int64 uint64_t; #include "src/Tensor/TensorDeviceDefault.h" #include "src/Tensor/TensorDeviceThreadPool.h" #include "src/Tensor/TensorDeviceCuda.h" +#include "src/Tensor/TensorSycl.h" +#include "src/Tensor/TensorDeviceSycl.h" #include "src/Tensor/TensorIndexList.h" #include "src/Tensor/TensorDimensionList.h" #include "src/Tensor/TensorDimensions.h" diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h index cb615c75b..166be200c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorAssign.h @@ -163,6 +163,11 @@ struct TensorEvaluator, Device> TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize); } + /// required by sycl in order to extract the accessor + const TensorEvaluator& left_impl() const { return m_leftImpl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& right_impl() const { return m_rightImpl; } + EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_leftImpl.data(); } private: diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h index 19d2b50b5..e3880d2e0 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBase.h @@ -811,7 +811,7 @@ class TensorBase protected: template friend class Tensor; - template friend class TensorFixedSize; + template class MakePointer_> friend class TensorFixedSize; template friend class TensorBase; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Derived& derived() const { return *static_cast(this); } @@ -827,7 +827,7 @@ class TensorBase : public TensorBase { static const int NumDimensions = DerivedTraits::NumDimensions; template friend class Tensor; - template friend class TensorFixedSize; + template class MakePointer_> friend class TensorFixedSize; template friend class TensorBase; EIGEN_DEVICE_FUNC diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h index 5d67f69f3..4cfe300eb 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorBroadcasting.h @@ -113,7 +113,7 @@ struct TensorEvaluator, Device> }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device) + : m_broadcast(op.broadcast()),m_impl(op.expression(), device) { // The broadcasting op doesn't change the rank of the tensor. One can't broadcast a scalar // and store the result in a scalar. Instead one should reshape the scalar into a a N-D @@ -374,7 +374,12 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC Scalar* data() const { return NULL; } + const TensorEvaluator& impl() const { return m_impl; } + + Broadcast functor() const { return m_broadcast; } + protected: + const Broadcast m_broadcast; Dimensions m_dimensions; array m_outputStrides; array m_inputStrides; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h new file mode 100644 index 000000000..bfd36f5aa --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h @@ -0,0 +1,122 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Cummins Chris PhD student at The University of Edinburgh. +// Contact: + +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#if defined(EIGEN_USE_SYCL) && !defined(EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H) +#define EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H + +namespace Eigen { +/// \struct BufferT is used to specialise add_sycl_buffer function for +// two types of buffer we have. When the MapAllocator is true, we create the +// sycl buffer with MapAllocator. +/// We have to const_cast the input pointer in order to work around the fact +/// that sycl does not accept map allocator for const pointer. +template +struct BufferT { + using Type = cl::sycl::buffer>; + static inline void add_sycl_buffer( + const T *ptr, size_t num_bytes, + std::map> &buffer_map) { + buffer_map.insert(std::pair>( + ptr, std::shared_ptr(std::make_shared( + Type(const_cast(ptr), cl::sycl::range<1>(num_bytes)))))); + } +}; + +/// specialisation of the \ref BufferT when the MapAllocator is false. In this +/// case we only create the device-only buffer. +template +struct BufferT { + using Type = cl::sycl::buffer; + static inline void add_sycl_buffer( + const T *ptr, size_t num_bytes, + std::map> &buffer_map) { + buffer_map.insert(std::pair>( + ptr, std::shared_ptr( + std::make_shared(Type(cl::sycl::range<1>(num_bytes)))))); + } +}; + +struct SyclDevice { + /// class members + /// sycl queue + cl::sycl::queue &m_queue; + /// std::map is the container used to make sure that we create only one buffer + /// per pointer. The lifespan of the buffer + /// now depends on the lifespan of SyclDevice. If a non-read-only pointer is + /// needed to be accessed on the host we should manually deallocate it. + mutable std::map> buffer_map; + + SyclDevice(cl::sycl::queue &q) : m_queue(q) {} + // destructor + ~SyclDevice() { deallocate_all(); } + + template + void deallocate(const T *p) const { + auto it = buffer_map.find(p); + if (it != buffer_map.end()) { + buffer_map.erase(it); + } + } + void deallocate_all() const { buffer_map.clear(); } + + /// creation of sycl accessor for a buffer. This function first tries to find + /// the buffer in the buffer_map. + /// If found it gets the accessor from it, if not, the function then adds an + /// entry by creating a sycl buffer + /// for that particular pointer. + template + inline cl::sycl::accessor + get_sycl_accessor(size_t num_bytes, cl::sycl::handler &cgh, + const T *ptr) const { + auto it = buffer_map.find(ptr); + if (it == buffer_map.end()) { + BufferT::add_sycl_buffer(ptr, num_bytes, buffer_map); + } + return ( + ((typename BufferT::Type *)(buffer_map.at(ptr).get())) + ->template get_access(cgh)); + } + + /// allocating memory on the cpu + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void *allocate(size_t num_bytes) const { + return internal::aligned_malloc(num_bytes); + } + + // some runtime conditions that can be applied here + bool isDeviceSuitable() const { return true; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void deallocate(void *buffer) const { + internal::aligned_free(buffer); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpy(void *dst, const void *src, + size_t n) const { + ::memcpy(dst, src, n); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyHostToDevice( + void *dst, const void *src, size_t n) const { + memcpy(dst, src, n); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memcpyDeviceToHost( + void *dst, const void *src, size_t n) const { + memcpy(dst, src, n); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void memset(void *buffer, int c, + size_t n) const { + ::memset(buffer, c, n); + } +}; +} // end namespace Eigen + +#endif // EIGEN_CXX11_TENSOR_TENSOR_DEVICE_SYCL_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h index a08dfa7c3..3dab6da99 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h @@ -20,8 +20,8 @@ namespace Eigen { * */ namespace internal { -template -struct traits > +template class MakePointer_> +struct traits > { // Type promotion to handle the case where the types of the lhs and the rhs are different. typedef typename XprType::Scalar Scalar; @@ -36,16 +36,20 @@ struct traits > enum { Flags = 0 }; + template + struct MakePointer { + typedef typename MakePointer_::Type Type; + }; }; -template -struct eval, Eigen::Dense> +template class MakePointer_> +struct eval, Eigen::Dense> { typedef const TensorEvalToOp& type; }; -template -struct nested, 1, typename eval >::type> +template class MakePointer_> +struct nested, 1, typename eval >::type> { typedef TensorEvalToOp type; }; @@ -55,37 +59,38 @@ struct nested, 1, typename eval -template -class TensorEvalToOp : public TensorBase, ReadOnlyAccessors> +template class MakePointer_> +class TensorEvalToOp : public TensorBase, ReadOnlyAccessors> { public: typedef typename Eigen::internal::traits::Scalar Scalar; typedef typename Eigen::NumTraits::Real RealScalar; typedef typename internal::remove_const::type CoeffReturnType; + typedef typename MakePointer_::Type PointerType; typedef typename Eigen::internal::nested::type Nested; typedef typename Eigen::internal::traits::StorageKind StorageKind; typedef typename Eigen::internal::traits::Index Index; - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvalToOp(CoeffReturnType* buffer, const XprType& expr) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvalToOp(PointerType buffer, const XprType& expr) : m_xpr(expr), m_buffer(buffer) {} EIGEN_DEVICE_FUNC const typename internal::remove_all::type& expression() const { return m_xpr; } - EIGEN_DEVICE_FUNC CoeffReturnType* buffer() const { return m_buffer; } + EIGEN_DEVICE_FUNC PointerType buffer() const { return m_buffer; } protected: typename XprType::Nested m_xpr; - CoeffReturnType* m_buffer; + PointerType m_buffer; }; -template -struct TensorEvaluator, Device> +template class MakePointer_> +struct TensorEvaluator, Device> { - typedef TensorEvalToOp XprType; + typedef TensorEvalToOp XprType; typedef typename ArgType::Scalar Scalar; typedef typename TensorEvaluator::Dimensions Dimensions; typedef typename XprType::Index Index; @@ -102,15 +107,22 @@ struct TensorEvaluator, Device> }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) - : m_impl(op.expression(), device), m_device(device), m_buffer(op.buffer()) + : m_impl(op.expression(), device), m_device(device), + m_buffer(op.buffer()), m_op(op), m_expression(op.expression()) { } + // Used for accessor extraction in SYCL Managed TensorMap: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const XprType& op() const { + return m_op; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~TensorEvaluator() { } + typedef typename internal::traits>::template MakePointer::Type DevicePointer; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } - EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* scalar) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(DevicePointer scalar) { EIGEN_UNUSED_VARIABLE(scalar); eigen_assert(scalar == NULL); return m_impl.evalSubExprsIfNeeded(m_buffer); @@ -145,12 +157,20 @@ struct TensorEvaluator, Device> TensorOpCost(0, sizeof(CoeffReturnType), 0, vectorized, PacketSize); } - EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return m_buffer; } + EIGEN_DEVICE_FUNC DevicePointer data() const { return m_buffer; } + ArgType expression() const { return m_expression; } + + /// required by sycl in order to extract the accessor + const TensorEvaluator& impl() const { return m_impl; } + /// added for sycl in order to construct the buffer from the sycl device + const Device& device() const{return m_device;} private: TensorEvaluator m_impl; const Device& m_device; - CoeffReturnType* m_buffer; + DevicePointer m_buffer; + const XprType& m_op; + const ArgType m_expression; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h index c2a327bf0..b2b4bcf62 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvaluator.h @@ -46,9 +46,11 @@ struct TensorEvaluator }; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) - : m_data(const_cast(m.data())), m_dims(m.dimensions()), m_device(device) + : m_data(const_cast::template MakePointer::Type>(m.data())), m_dims(m.dimensions()), m_device(device), m_impl(m) { } + // Used for accessor extraction in SYCL Managed TensorMap: + const Derived& derived() const { return m_impl; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* dest) { @@ -106,12 +108,16 @@ struct TensorEvaluator internal::unpacket_traits::size); } - EIGEN_DEVICE_FUNC Scalar* data() const { return m_data; } + EIGEN_DEVICE_FUNC typename internal::traits::template MakePointer::Type data() const { return m_data; } + + /// required by sycl in order to construct sycl buffer from raw pointer + const Device& device() const{return m_device;} protected: - Scalar* m_data; + typename internal::traits::template MakePointer::Type m_data; Dimensions m_dims; const Device& m_device; + const Derived& m_impl; }; namespace { @@ -159,8 +165,11 @@ struct TensorEvaluator RawAccess = true }; + // Used for accessor extraction in SYCL Managed TensorMap: + const Derived& derived() const { return m_impl; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const Derived& m, const Device& device) - : m_data(m.data()), m_dims(m.dimensions()), m_device(device) + : m_data(m.data()), m_dims(m.dimensions()), m_device(device), m_impl(m) { } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dims; } @@ -198,12 +207,16 @@ struct TensorEvaluator internal::unpacket_traits::size); } - EIGEN_DEVICE_FUNC const Scalar* data() const { return m_data; } + EIGEN_DEVICE_FUNC typename internal::traits::template MakePointer::Type data() const { return m_data; } + + /// added for sycl in order to construct the buffer from the sycl device + const Device& device() const{return m_device;} protected: - const Scalar* m_data; + typename internal::traits::template MakePointer::Type m_data; Dimensions m_dims; const Device& m_device; + const Derived& m_impl; }; @@ -260,6 +273,12 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& impl() const { return m_argImpl; } + /// required by sycl in order to extract the accessor + NullaryOp functor() const { return m_functor; } + + private: const NullaryOp m_functor; TensorEvaluator m_argImpl; @@ -323,6 +342,12 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } + /// required by sycl in order to extract the accessor + const TensorEvaluator & impl() const { return m_argImpl; } + /// added for sycl in order to construct the buffer from sycl device + UnaryOp functor() const { return m_functor; } + + private: const UnaryOp m_functor; TensorEvaluator m_argImpl; @@ -396,6 +421,12 @@ struct TensorEvaluator& left_impl() const { return m_leftImpl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& right_impl() const { return m_rightImpl; } + /// required by sycl in order to extract the accessor + BinaryOp functor() const { return m_functor; } private: const BinaryOp m_functor; @@ -491,10 +522,17 @@ struct TensorEvaluator & arg1Impl() const { return m_arg1Impl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& arg2Impl() const { return m_arg2Impl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& arg3Impl() const { return m_arg3Impl; } + private: const TernaryOp m_functor; TensorEvaluator m_arg1Impl; - TensorEvaluator m_arg2Impl; + TensorEvaluator m_arg2Impl; TensorEvaluator m_arg3Impl; }; @@ -575,6 +613,12 @@ struct TensorEvaluator } EIGEN_DEVICE_FUNC CoeffReturnType* data() const { return NULL; } + /// required by sycl in order to extract the accessor + const TensorEvaluator & cond_impl() const { return m_condImpl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& then_impl() const { return m_thenImpl; } + /// required by sycl in order to extract the accessor + const TensorEvaluator& else_impl() const { return m_elseImpl; } private: TensorEvaluator m_condImpl; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h index a116bf17f..9b99af641 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorExecutor.h @@ -272,6 +272,20 @@ inline void TensorExecutor::run( #endif // __CUDACC__ #endif // EIGEN_USE_GPU +// SYCL Executor policy +#ifdef EIGEN_USE_SYCL + +template +class TensorExecutor { +public: + static inline void run(const Expression &expr, const SyclDevice &device) { + // call TensorSYCL module + TensorSycl::run(expr, device); + } +}; + +#endif + } // end namespace internal } // end namespace Eigen diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h b/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h index fcee5f60d..415e459b9 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorFixedSize.h @@ -23,8 +23,8 @@ namespace Eigen { * Eigen::TensorFixedSize> t; */ -template -class TensorFixedSize : public TensorBase > +template class MakePointer_> +class TensorFixedSize : public TensorBase > { public: typedef TensorFixedSize Self; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h index c23ecdbc4..9cf4a07e5 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -19,9 +19,15 @@ namespace Eigen { * * */ +/// template class MakePointer_ is added to convert the host pointer to the device pointer. +/// It is added due to the fact that for our device compiler T* is not allowed. +/// If we wanted to use the same Evaluator functions we have to convert that type to our pointer T. +/// This is done through our MakePointer_ class. By default the Type in the MakePointer_ is T* . +/// Therefore, by adding the default value, we managed to convert the type and it does not break any +/// existing code as its default value is T*. namespace internal { -template -struct traits > +template class MakePointer_> +struct traits > { // Type promotion to handle the case where the types of the lhs and the rhs are different. typedef typename XprType::Scalar Scalar; @@ -36,26 +42,30 @@ struct traits > enum { Flags = 0 }; + template + struct MakePointer { + typedef typename MakePointer_::Type Type; + }; }; -template -struct eval, Eigen::Dense> +template class MakePointer_> +struct eval, Eigen::Dense> { - typedef const TensorForcedEvalOp& type; + typedef const TensorForcedEvalOp& type; }; -template -struct nested, 1, typename eval >::type> +template class MakePointer_> +struct nested, 1, typename eval >::type> { - typedef TensorForcedEvalOp type; + typedef TensorForcedEvalOp type; }; } // end namespace internal -template -class TensorForcedEvalOp : public TensorBase, ReadOnlyAccessors> +template class MakePointer_> +class TensorForcedEvalOp : public TensorBase, ReadOnlyAccessors> { public: typedef typename Eigen::internal::traits::Scalar Scalar; @@ -77,10 +87,10 @@ class TensorForcedEvalOp : public TensorBase, ReadOn }; -template -struct TensorEvaluator, Device> +template class MakePointer_> +struct TensorEvaluator, Device> { - typedef TensorForcedEvalOp XprType; + typedef TensorForcedEvalOp XprType; typedef typename ArgType::Scalar Scalar; typedef typename TensorEvaluator::Dimensions Dimensions; typedef typename XprType::Index Index; @@ -96,6 +106,7 @@ struct TensorEvaluator, Device> }; EIGEN_DEVICE_FUNC TensorEvaluator(const XprType& op, const Device& device) + /// op_ is used for sycl : m_impl(op.expression(), device), m_op(op.expression()), m_device(device), m_buffer(NULL) { } @@ -110,10 +121,10 @@ struct TensorEvaluator, Device> new(m_buffer+i) CoeffReturnType(); } } - typedef TensorEvalToOp EvalTo; + typedef TensorEvalToOp< const typename internal::remove_const::type > EvalTo; EvalTo evalToTmp(m_buffer, m_op); const bool PacketAccess = internal::IsVectorizable::value; - internal::TensorExecutor::run(evalToTmp, m_device); + internal::TensorExecutor::type, PacketAccess>::run(evalToTmp, m_device); return true; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { @@ -136,13 +147,17 @@ struct TensorEvaluator, Device> return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); } - EIGEN_DEVICE_FUNC Scalar* data() const { return m_buffer; } + EIGEN_DEVICE_FUNC typename MakePointer::Type data() const { return m_buffer; } + /// required by sycl in order to extract the sycl accessor + const TensorEvaluator& impl() { return m_impl; } + /// used by sycl in order to build the sycl buffer + const Device& device() const{return m_device;} private: TensorEvaluator m_impl; const ArgType m_op; const Device& m_device; - CoeffReturnType* m_buffer; + typename MakePointer::Type m_buffer; }; diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h index 490ddd8bd..83c690133 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForwardDeclarations.h @@ -12,9 +12,19 @@ namespace Eigen { +// MakePointer class is used as a container of the adress space of the pointer +// on the host and on the device. From the host side it generates the T* pointer +// and when EIGEN_USE_SYCL is used it construct a buffer with a map_allocator to +// T* m_data on the host. It is always called on the device. +// Specialisation of MakePointer class for creating the sycl buffer with +// map_allocator. +template struct MakePointer{ + typedef T* Type; +}; + +template class MakePointer_ = MakePointer> class TensorMap; template class Tensor; -template class TensorFixedSize; -template class TensorMap; +template class MakePointer_ = MakePointer> class TensorFixedSize; template class TensorRef; template class TensorBase; @@ -52,8 +62,8 @@ template class TensorScanOp; template class TensorCustomUnaryOp; template class TensorCustomBinaryOp; -template class TensorEvalToOp; -template class TensorForcedEvalOp; +template class MakePointer_ = MakePointer> class TensorEvalToOp; +template class MakePointer_ = MakePointer> class TensorForcedEvalOp; template class TensorDevice; template struct TensorEvaluator; @@ -61,6 +71,7 @@ template struct TensorEvaluator; struct DefaultDevice; struct ThreadPoolDevice; struct GpuDevice; +struct SyclDevice; enum FFTResultType { RealPart = 0, diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h index 6fb4f4a31..298a49138 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h @@ -18,11 +18,16 @@ namespace Eigen { * \brief A tensor expression mapping an existing array of data. * */ - -template class TensorMap : public TensorBase > +/// template class MakePointer_ is added to convert the host pointer to the device pointer. +/// It is added due to the fact that for our device compiler T* is not allowed. +/// If we wanted to use the same Evaluator functions we have to convert that type to our pointer T. +/// This is done through our MakePointer_ class. By default the Type in the MakePointer_ is T* . +/// Therefore, by adding the default value, we managed to convert the type and it does not break any +/// existing code as its default value is T*. +template class MakePointer_> class TensorMap : public TensorBase > { public: - typedef TensorMap Self; + typedef TensorMap Self; typedef typename PlainObjectType::Base Base; typedef typename Eigen::internal::nested::type Nested; typedef typename internal::traits::StorageKind StorageKind; @@ -36,7 +41,7 @@ template class TensorMap : public Tensor Scalar *, const Scalar *>::type PointerType;*/ - typedef Scalar* PointerType; + typedef typename MakePointer_::Type PointerType; typedef PointerType PointerArgType; static const int Options = Options_; @@ -109,9 +114,9 @@ template class TensorMap : public Tensor EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_dimensions.TotalSize(); } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE Scalar* data() { return m_data; } + EIGEN_STRONG_INLINE PointerType data() { return m_data; } EIGEN_DEVICE_FUNC - EIGEN_STRONG_INLINE const Scalar* data() const { return m_data; } + EIGEN_STRONG_INLINE const PointerType data() const { return m_data; } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& operator()(const array& indices) const @@ -307,8 +312,9 @@ template class TensorMap : public Tensor } private: - Scalar* m_data; + typename MakePointer_::Type m_data; Dimensions m_dimensions; + size_t is_coverted= size_t(0); }; } // end namespace Eigen diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h new file mode 100644 index 000000000..277dd739c --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h @@ -0,0 +1,62 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: eigen@codeplay.com +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// General include header of SYCL target for Tensor Module +#ifndef TENSORSYCL_H +#define TENSORSYCL_H + +#ifdef EIGEN_USE_SYCL + +// trait class to extract different attribute contents +template +struct Trait; +// global pointer to set different attribute state for a class +template +struct MakeGlobalPointer { + typedef typename cl::sycl::global_ptr::pointer_t Type; +}; + +// tuple construction +#include "TensorSyclTuple.h" + +// This file contains the PlaceHolder that replaces the actual data +#include "TensorSyclPlaceHolder.h" + +#include "TensorSyclLeafCount.h" + +// The index PlaceHolder takes the actual expression and replaces the actual +// data on it with the place holder. It uses the same pre-order expression tree +// traverse as the leaf count in order to give the right access number to each +// node in the expression +#include "TensorSyclPlaceHolderExpr.h" + +// creation of an accessor tuple from a tuple of SYCL buffers +#include "TensorSyclExtractAccessor.h" + +// actual data extraction using accessors +//#include "GetDeviceData.h" + +// this is used to change the address space type in tensor map for GPU +#include "TensorSyclConvertToDeviceExpression.h" + +// this is used to extract the functors +#include "TensorSyclExtractFunctors.h" + +// this is used to create tensormap on the device +// this is used to construct the expression on the device +#include "TensorSyclExprConstructor.h" + +// kernel execution using fusion +#include "TensorSyclRun.h" + +#endif // end of EIGEN_USE_SYCL +#endif // TENSORSYCL_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h new file mode 100644 index 000000000..b3748131b --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h @@ -0,0 +1,238 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensorSyclConvertToDeviceExpression.h + * + * \brief: + * Conversion from host pointer to device pointer + * inside leaf nodes of the expression. + * +*****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_TENSORSYCL_CONVERT_TO_DEVICE_EXPRESSION_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_TENSORSYCL_CONVERT_TO_DEVICE_EXPRESSION_HPP + +namespace Eigen { +namespace TensorSycl { +namespace internal { +/// \struct ConvertToDeviceExpression +/// \brief This struct is used to convert the MakePointer in the host expression +/// to the MakeGlobalPointer for the device expression. For the leafNodes +/// containing the pointer. This is due to the fact that the address space of +/// the pointer T* is different on the host and the device. +template +struct ConvertToDeviceExpression; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is TensorMap +template class MakePointer_> +struct ConvertToDeviceExpression< + TensorMap, Options2_, + MakePointer_>> { + using Type = TensorMap, + Options2_, MakeGlobalPointer>; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is const TensorMap +template class MakePointer_> +struct ConvertToDeviceExpression< + const TensorMap, + Options2_, MakePointer_>> { + using Type = + const TensorMap, + Options2_, MakeGlobalPointer>; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is const TensorCwiseNullaryOp +template +struct ConvertToDeviceExpression> { + using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; + using Type = const TensorCwiseNullaryOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is TensorCwiseNullaryOp +template +struct ConvertToDeviceExpression> { + using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; + using Type = TensorCwiseNullaryOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is const TensorBroadcastingOp +template +struct ConvertToDeviceExpression> { + using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; + using Type = const TensorBroadcastingOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is TensorBroadcastingOp +template +struct ConvertToDeviceExpression> { + using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; + using Type = TensorBroadcastingOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is const TensorCwiseUnaryOp +template +struct ConvertToDeviceExpression> { + using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; + using Type = const TensorCwiseUnaryOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is TensorCwiseUnaryOp +template +struct ConvertToDeviceExpression> { + using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; + using Type = TensorCwiseUnaryOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is const TensorCwiseBinaryOp +template +struct ConvertToDeviceExpression< + const TensorCwiseBinaryOp> { + using LHSPlaceHolderType = typename ConvertToDeviceExpression::Type; + using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; + using Type = + const TensorCwiseBinaryOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is TensorCwiseBinaryOp +template +struct ConvertToDeviceExpression> { + using LHSPlaceHolderType = typename ConvertToDeviceExpression::Type; + using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; + using Type = TensorCwiseBinaryOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is const TensorCwiseTernaryOp +template +struct ConvertToDeviceExpression< + const TensorCwiseTernaryOp> { + using Arg1PlaceHolderType = + typename ConvertToDeviceExpression::Type; + using Arg2PlaceHolderType = + typename ConvertToDeviceExpression::Type; + using Arg3PlaceHolderType = + typename ConvertToDeviceExpression::Type; + using Type = + const TensorCwiseTernaryOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is TensorCwiseTernaryOp +template +struct ConvertToDeviceExpression< + TensorCwiseTernaryOp> { + using Arg1PlaceHolderType = + typename ConvertToDeviceExpression::Type; + using Arg2PlaceHolderType = + typename ConvertToDeviceExpression::Type; + using Arg3PlaceHolderType = + typename ConvertToDeviceExpression::Type; + using Type = TensorCwiseTernaryOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is const TensorCwiseSelectOp +template +struct ConvertToDeviceExpression< + const TensorSelectOp> { + using IfPlaceHolderType = typename ConvertToDeviceExpression::Type; + using ThenPlaceHolderType = + typename ConvertToDeviceExpression::Type; + using ElsePlaceHolderType = + typename ConvertToDeviceExpression::Type; + using Type = const TensorSelectOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is TensorCwiseSelectOp +template +struct ConvertToDeviceExpression> { + using IfPlaceHolderType = typename ConvertToDeviceExpression::Type; + using ThenPlaceHolderType = + typename ConvertToDeviceExpression::Type; + using ElsePlaceHolderType = + typename ConvertToDeviceExpression::Type; + using Type = TensorSelectOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is const AssingOP +template +struct ConvertToDeviceExpression> { + using LHSPlaceHolderType = typename ConvertToDeviceExpression::Type; + using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; + using Type = const TensorAssignOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is AssingOP +template +struct ConvertToDeviceExpression> { + using LHSPlaceHolderType = typename ConvertToDeviceExpression::Type; + using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; + using Type = TensorAssignOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is const TensorForcedEvalOp +template +struct ConvertToDeviceExpression> { + using PlaceHolderType = typename ConvertToDeviceExpression::Type; + using Type = const TensorForcedEvalOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is const TensorForcedEvalOp +template +struct ConvertToDeviceExpression> { + using PlaceHolderType = typename ConvertToDeviceExpression::Type; + using Type = TensorForcedEvalOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is const TensorEvalToOp +template +struct ConvertToDeviceExpression> { + using PlaceHolderType = typename ConvertToDeviceExpression::Type; + using Type = const TensorEvalToOp; +}; + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is TensorEvalToOp +template +struct ConvertToDeviceExpression> { + using PlaceHolderType = typename ConvertToDeviceExpression::Type; + using Type = TensorEvalToOp; +}; +} // namespace internal +} // namespace TensorSycl +} // namespace Eigen + +#endif // UNSUPPORTED_EIGEN_CXX1 diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h new file mode 100644 index 000000000..fe3994175 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h @@ -0,0 +1,495 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensorSyclExprConstructor.h + * + * \brief: + * This file re-create an expression on the SYCL device in order + * to use the original tensor evaluator. + * +*****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXPR_CONSTRUCTOR_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXPR_CONSTRUCTOR_HPP + +namespace Eigen { +namespace TensorSycl { +namespace internal { +/// this class is used by EvalToOp in order to create an lhs expression which is +/// a pointer from an accessor on device-only buffer +template +struct EvalToLHSConstructor { + PtrType expr; + EvalToLHSConstructor(const utility::tuple::Tuple &t) + : expr((&(*(utility::tuple::get(t).get_pointer())))) {} +}; + +/// \struct ExprConstructor is used to reconstruct the expression on the device +/// and +/// recreate the expression with MakeGlobalPointer containing the device address +/// space for the TensorMap pointers used in eval function. +/// It receives the original expression type, the functor of the node, the tuple +/// of accessors, and the device expression type to re-instantiate the +/// expression tree for the device +template +struct ExprConstructor; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// const TensorMap +template class MakePointer_, size_t N, typename... Params> +struct ExprConstructor< + const TensorMap, + Options2_, MakeGlobalPointer>, + const Eigen::internal::PlaceHolder< + const TensorMap, + Options3_, MakePointer_>, + N>, + Params...> { + using Type = + const TensorMap, + Options2_, MakeGlobalPointer>; + + Type expr; + + template + ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple &t) + : expr(Type((&(*(utility::tuple::get(t).get_pointer()))), + fd.dimensions())) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// TensorMap +template class MakePointer_, size_t N, typename... Params> +struct ExprConstructor< + TensorMap, Options2_, + MakeGlobalPointer>, + Eigen::internal::PlaceHolder< + TensorMap, Options3_, + MakePointer_>, + N>, + Params...> { + using Type = TensorMap, + Options2_, MakeGlobalPointer>; + + Type expr; + template + ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple &t) + : expr(Type((&(*(utility::tuple::get(t).get_pointer()))), + fd.dimensions())) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// TensorCwiseNullaryOp +template +struct ExprConstructor, + TensorCwiseNullaryOp, Params...> { + using my_type = ExprConstructor; + my_type rhsExpr; + using Type = TensorCwiseNullaryOp; + Type expr; + + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// const TensorCwiseNullaryOp +template +struct ExprConstructor, + const TensorCwiseNullaryOp, Params...> { + using my_type = const ExprConstructor; + my_type rhsExpr; + using Type = const TensorCwiseNullaryOp; + Type expr; + + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// TensorBroadcastingOp +template +struct ExprConstructor, + TensorBroadcastingOp, Params...> { + using my_type = ExprConstructor; + my_type rhsExpr; + using Type = TensorBroadcastingOp; + Type expr; + + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// const TensorBroadcastingOp +template +struct ExprConstructor, + const TensorBroadcastingOp, Params...> { + using my_type = const ExprConstructor; + my_type rhsExpr; + using Type = const TensorBroadcastingOp; + Type expr; + + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// TensorCwiseUnaryOp +template +struct ExprConstructor, + TensorCwiseUnaryOp, Params...> { + using my_type = ExprConstructor; + using Type = TensorCwiseUnaryOp; + my_type rhsExpr; + Type expr; + + template + ExprConstructor(FuncDetector &funcD, utility::tuple::Tuple &t) + : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// const TensorCwiseUnaryOp +template +struct ExprConstructor, + const TensorCwiseUnaryOp, Params...> { + using my_type = ExprConstructor; + using Type = const TensorCwiseUnaryOp; + my_type rhsExpr; + Type expr; + + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// TensorCwiseBinaryOp +template +struct ExprConstructor, + TensorCwiseBinaryOp, Params...> { + using my_left_type = ExprConstructor; + using my_right_type = ExprConstructor; + using Type = TensorCwiseBinaryOp; + + my_left_type lhsExpr; + my_right_type rhsExpr; + Type expr; + + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : lhsExpr(funcD.lhsExpr, t), + rhsExpr(funcD.rhsExpr, t), + expr(lhsExpr.expr, rhsExpr.expr, funcD.func) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// const TensorCwiseBinaryOp +template +struct ExprConstructor, + const TensorCwiseBinaryOp, + Params...> { + using my_left_type = ExprConstructor; + using my_right_type = ExprConstructor; + using Type = const TensorCwiseBinaryOp; + + my_left_type lhsExpr; + my_right_type rhsExpr; + Type expr; + + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : lhsExpr(funcD.lhsExpr, t), + rhsExpr(funcD.rhsExpr, t), + expr(lhsExpr.expr, rhsExpr.expr, funcD.func) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// const TensorCwiseTernaryOp +template +struct ExprConstructor< + const TensorCwiseTernaryOp, + const TensorCwiseTernaryOp, Params...> { + using my_arg1_type = ExprConstructor; + using my_arg2_type = ExprConstructor; + using my_arg3_type = ExprConstructor; + using Type = const TensorCwiseTernaryOp; + + my_arg1_type arg1Expr; + my_arg2_type arg2Expr; + my_arg3_type arg3Expr; + Type expr; + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : arg1Expr(funcD.arg1Expr, t), + arg2Expr(funcD.arg2Expr, t), + arg3Expr(funcD.arg3Expr, t), + expr(arg1Expr.expr, arg2Expr.expr, arg3Expr.expr, funcD.func) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// TensorCwiseTernaryOp +template +struct ExprConstructor< + TensorCwiseTernaryOp, + TensorCwiseTernaryOp, Params...> { + using my_arg1_type = ExprConstructor; + using my_arg2_type = ExprConstructor; + using my_arg3_type = ExprConstructor; + using Type = TensorCwiseTernaryOp; + + my_arg1_type arg1Expr; + my_arg2_type arg2Expr; + my_arg3_type arg3Expr; + Type expr; + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : arg1Expr(funcD.arg1Expr, t), + arg2Expr(funcD.arg2Expr, t), + arg3Expr(funcD.arg3Expr, t), + expr(arg1Expr.expr, arg2Expr.expr, arg3Expr.expr, funcD.func) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// const TensorCwiseSelectOp +template +struct ExprConstructor< + const TensorSelectOp, + const TensorSelectOp, Params...> { + using my_if_type = ExprConstructor; + using my_then_type = ExprConstructor; + using my_else_type = ExprConstructor; + using Type = const TensorSelectOp; + + my_if_type ifExpr; + my_then_type thenExpr; + my_else_type elseExpr; + Type expr; + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : ifExpr(funcD.ifExpr, t), + thenExpr(funcD.thenExpr, t), + elseExpr(funcD.elseExpr, t), + expr(ifExpr.expr, thenExpr.expr, elseExpr.expr) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// TensorCwiseSelectOp +template +struct ExprConstructor, + TensorSelectOp, Params...> { + using my_if_type = ExprConstructor; + using my_then_type = ExprConstructor; + using my_else_type = ExprConstructor; + using Type = + TensorSelectOp; + + my_if_type ifExpr; + my_then_type thenExpr; + my_else_type elseExpr; + Type expr; + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : ifExpr(funcD.ifExpr, t), + thenExpr(funcD.thenExpr, t), + elseExpr(funcD.elseExpr, t), + expr(ifExpr.expr, thenExpr.expr, elseExpr.expr) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// TensorAssignOp +template +struct ExprConstructor, + TensorAssignOp, Params...> { + using my_left_type = ExprConstructor; + using my_right_type = ExprConstructor; + using Type = + TensorAssignOp; + + my_left_type lhsExpr; + my_right_type rhsExpr; + Type expr; + + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : lhsExpr(funcD.lhsExpr, t), + rhsExpr(funcD.rhsExpr, t), + expr(lhsExpr.expr, rhsExpr.expr) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// const TensorAssignOp +template +struct ExprConstructor, + const TensorAssignOp, Params...> { + using my_left_type = ExprConstructor; + using my_right_type = ExprConstructor; + using Type = const TensorAssignOp; + + my_left_type lhsExpr; + my_right_type rhsExpr; + Type expr; + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : lhsExpr(funcD.lhsExpr, t), + rhsExpr(funcD.rhsExpr, t), + expr(lhsExpr.expr, rhsExpr.expr) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// const TensorEvalToOp +template +struct ExprConstructor, + const TensorEvalToOp, Params...> { + using my_expr_type = ExprConstructor; + using my_buffer_type = + typename TensorEvalToOp::PointerType; + using Type = + const TensorEvalToOp; + my_expr_type nestedExpression; + EvalToLHSConstructor buffer; + Type expr; + + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : nestedExpression(funcD.rhsExpr, t), + buffer(t), + expr(buffer.expr, nestedExpression.expr) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// TensorEvalToOp +template +struct ExprConstructor, + TensorEvalToOp, Params...> { + using my_expr_type = ExprConstructor; + using my_buffer_type = + typename TensorEvalToOp::PointerType; + using Type = TensorEvalToOp; + my_expr_type nestedExpression; + EvalToLHSConstructor buffer; + Type expr; + + template + ExprConstructor(FuncDetector &funcD, + const utility::tuple::Tuple &t) + : nestedExpression(funcD.rhsExpr, t), + buffer(t), + expr(buffer.expr, nestedExpression.expr) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// const TensorForcedEvalOp +template +struct ExprConstructor< + const TensorForcedEvalOp, + const Eigen::internal::PlaceHolder, N>, + Params...> { + using Type = const TensorMap< + Tensor::Scalar, + TensorForcedEvalOp::NumDimensions, 0, + typename TensorForcedEvalOp::Index>, + 0, MakeGlobalPointer>; + + Type expr; + + template + ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple &t) + : expr(Type((&(*(utility::tuple::get(t).get_pointer()))), + fd.dimensions())) {} +}; + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// TensorForcedEvalOp +template +struct ExprConstructor< + const TensorForcedEvalOp, + const Eigen::internal::PlaceHolder, N>, + Params...> { + using Type = TensorMap< + Tensor::Scalar, 1, + 0, typename TensorForcedEvalOp::Index>, + 0, MakeGlobalPointer>; + + Type expr; + + template + ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple &t) + : expr(Type((&(*(utility::tuple::get(t).get_pointer()))), + fd.dimensions())) {} +}; + +/// template deduction for \ref ExprConstructor struct +template +auto createDeviceExpression(FuncD &funcD, + const utility::tuple::Tuple &t) + -> decltype(ExprConstructor(funcD, t)) { + return ExprConstructor(funcD, t); +} +} +} +} // namespace Eigen + +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXPR_CONSTRUCTOR_HPP diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h new file mode 100644 index 000000000..cb0ac131d --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h @@ -0,0 +1,466 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensorSyclExtractAccessor.h + * + * \brief: + * ExtractAccessor takes Expression placeHolder expression and the tuple of sycl + * buffers as an input. Using pre-order tree traversal, ExtractAccessor + * recursively calls itself for its children in the expression tree. The + * leaf node in the PlaceHolder expression is nothing but a container preserving + * the order of the actual data in the tuple of sycl buffer. By invoking the + * extract accessor for the PlaceHolder, an accessor is created for the Nth + * buffer in the tuple of buffers. This accessor is then added as an Nth + * element in the tuple of accessors. In this case we preserve the order of data + * in the expression tree. + * + * This is the specialisation of extract accessor method for different operation + * type in the PlaceHolder expression. + * +*****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXTRACT_ACCESSOR_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXTRACT_ACCESSOR_HPP + +namespace Eigen { +namespace TensorSycl { +namespace internal { +/// \struct ExtractAccessor: Extract Accessor Class is used to extract the +/// accessor from a buffer. +/// Depending on the type of the leaf node we can get a read accessor or a +/// read_write accessor +template +struct ExtractAccessor; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TensorMap +template +struct ExtractAccessor< + TensorEvaluator, Dev>> { + using actual_type = typename Eigen::internal::remove_all< + typename Eigen::internal::traits::Scalar>::type; + static inline auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator, Dev> + eval) + -> decltype(utility::tuple::make_tuple( + (eval.device() + .template get_sycl_accessor( + eval.dimensions().TotalSize(), cgh, + eval.derived().data())))) { + return utility::tuple::make_tuple( + (eval.device() + .template get_sycl_accessor( + eval.dimensions().TotalSize(), cgh, eval.derived().data()))); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// TensorMap +template +struct ExtractAccessor< + TensorEvaluator, Dev>> { + using actual_type = typename Eigen::internal::remove_all< + typename Eigen::internal::traits::Scalar>::type; + + static inline auto getTuple( + cl::sycl::handler& cgh, + TensorEvaluator, Dev> eval) + -> decltype(utility::tuple::make_tuple( + (eval.device() + .template get_sycl_accessor( + eval.dimensions().TotalSize(), cgh, + eval.derived().data())))) { + return utility::tuple::make_tuple( + (eval.device() + .template get_sycl_accessor( + eval.dimensions().TotalSize(), cgh, eval.derived().data()))); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TensorCwiseNullaryOp +template +struct ExtractAccessor< + TensorEvaluator, Dev>> { + static auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator, Dev> eval) + -> decltype(ExtractAccessor>::getTuple( + cgh, eval.impl())) { + auto RHSTuple = ExtractAccessor>::getTuple( + cgh, eval.impl()); + return RHSTuple; + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// TensorCwiseNullaryOp +template +struct ExtractAccessor< + TensorEvaluator, Dev>> { + static auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator, Dev> eval) + -> decltype(ExtractAccessor>::getTuple( + cgh, eval.impl())) { + auto RHSTuple = ExtractAccessor>::getTuple( + cgh, eval.impl()); + return RHSTuple; + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TensorBroadcastingOp +template +struct ExtractAccessor< + TensorEvaluator, Dev>> { + static auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator, Dev> eval) + -> decltype(ExtractAccessor>::getTuple( + cgh, eval.impl())) { + auto RHSTuple = ExtractAccessor>::getTuple( + cgh, eval.impl()); + return RHSTuple; + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// TensorBroadcastingOp +template +struct ExtractAccessor< + TensorEvaluator, Dev>> { + static auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator, Dev> eval) + -> decltype(ExtractAccessor>::getTuple( + cgh, eval.impl())) { + auto RHSTuple = ExtractAccessor>::getTuple( + cgh, eval.impl()); + return RHSTuple; + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TenosorCwiseUnary +template +struct ExtractAccessor< + TensorEvaluator, Dev>> { + static auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator, Dev> eval) + -> decltype(ExtractAccessor>::getTuple( + cgh, eval.impl())) { + auto RHSTuple = ExtractAccessor>::getTuple( + cgh, eval.impl()); + return RHSTuple; + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// TenosorCwiseUnary +template +struct ExtractAccessor, Dev>> { + static auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator, Dev> eval) + -> decltype(ExtractAccessor>::getTuple( + cgh, eval.impl())) { + auto RHSTuple = ExtractAccessor>::getTuple( + cgh, eval.impl()); + return RHSTuple; + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TensorCwiseBinaryOp +template +struct ExtractAccessor< + TensorEvaluator, Dev>> { + static auto getTuple(cl::sycl::handler& cgh, + const TensorEvaluator< + const TensorCwiseBinaryOp, Dev> + eval) + -> decltype(utility::tuple::append( + ExtractAccessor>::getTuple( + cgh, eval.left_impl()), + ExtractAccessor>::getTuple( + cgh, eval.right_impl()))) { + auto LHSTuple = ExtractAccessor>::getTuple( + cgh, eval.left_impl()); + auto RHSTuple = ExtractAccessor>::getTuple( + cgh, eval.right_impl()); + return utility::tuple::append(LHSTuple, RHSTuple); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// TensorCwiseBinaryOp +template +struct ExtractAccessor< + TensorEvaluator, Dev>> { + static auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator, Dev> + eval) + -> decltype(utility::tuple::append( + ExtractAccessor>::getTuple( + cgh, eval.left_impl()), + ExtractAccessor>::getTuple( + cgh, eval.right_impl()))) { + auto LHSTuple = ExtractAccessor>::getTuple( + cgh, eval.left_impl()); + auto RHSTuple = ExtractAccessor>::getTuple( + cgh, eval.right_impl()); + return utility::tuple::append(LHSTuple, RHSTuple); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TensorCwiseTernaryOp +template +struct ExtractAccessor, Dev>> { + static auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator< + const TensorCwiseTernaryOp, Dev> + eval) + -> decltype(utility::tuple::append( + ExtractAccessor>::getTuple( + cgh, eval.arg1Impl()), + utility::tuple::append( + ExtractAccessor>::getTuple( + cgh, eval.arg2Impl()), + ExtractAccessor>::getTuple( + cgh, eval.arg3Impl())))) { + auto Arg1Tuple = ExtractAccessor>::getTuple( + cgh, eval.arg1Impl()); + auto Arg2Tuple = ExtractAccessor>::getTuple( + cgh, eval.arg2Impl()); + auto Arg3Tuple = ExtractAccessor>::getTuple( + cgh, eval.arg3Impl()); + return utility::tuple::append(Arg1Tuple, + utility::tuple::append(Arg2Tuple, Arg3Tuple)); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// TensorCwiseTernaryOp +template +struct ExtractAccessor, Dev>> { + static auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator< + TensorCwiseTernaryOp, Dev> + eval) + -> decltype(utility::tuple::append( + ExtractAccessor>::getTuple( + cgh, eval.arg1Impl()), + utility::tuple::append( + ExtractAccessor>::getTuple( + cgh, eval.arg2Impl()), + ExtractAccessor>::getTuple( + cgh, eval.arg3Impl())))) { + auto Arg1Tuple = ExtractAccessor>::getTuple( + cgh, eval.arg1Impl()); + auto Arg2Tuple = ExtractAccessor>::getTuple( + cgh, eval.arg2Impl()); + auto Arg3Tuple = ExtractAccessor>::getTuple( + cgh, eval.arg3Impl()); + return utility::tuple::append(Arg1Tuple, + utility::tuple::append(Arg2Tuple, Arg3Tuple)); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TensorCwiseSelectOp +template +struct ExtractAccessor< + TensorEvaluator, Dev>> { + static auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator, + Dev> + eval) + -> decltype(utility::tuple::append( + ExtractAccessor>::getTuple( + cgh, eval.cond_impl()), + utility::tuple::append( + ExtractAccessor>::getTuple( + cgh, eval.then_impl()), + ExtractAccessor>::getTuple( + cgh, eval.else_impl())))) { + auto IfTuple = ExtractAccessor>::getTuple( + cgh, eval.cond_impl()); + auto ThenTuple = ExtractAccessor>::getTuple( + cgh, eval.then_impl()); + auto ElseTuple = ExtractAccessor>::getTuple( + cgh, eval.else_impl()); + return utility::tuple::append(IfTuple, + utility::tuple::append(ThenTuple, ElseTuple)); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// TensorCwiseSelectOp +template +struct ExtractAccessor< + TensorEvaluator, Dev>> { + static auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator, Dev> + eval) + -> decltype(utility::tuple::append( + ExtractAccessor>::getTuple( + cgh, eval.cond_impl()), + utility::tuple::append( + ExtractAccessor>::getTuple( + cgh, eval.then_impl()), + ExtractAccessor>::getTuple( + cgh, eval.else_impl())))) { + auto IfTuple = ExtractAccessor>::getTuple( + cgh, eval.cond_impl()); + auto ThenTuple = ExtractAccessor>::getTuple( + cgh, eval.then_impl()); + auto ElseTuple = ExtractAccessor>::getTuple( + cgh, eval.else_impl()); + return utility::tuple::append(IfTuple, + utility::tuple::append(ThenTuple, ElseTuple)); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TensorAssignOp +template +struct ExtractAccessor< + TensorEvaluator, Dev>> { + static auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator, Dev> eval) + -> decltype(utility::tuple::append( + ExtractAccessor>::getTuple( + cgh, eval.left_impl()), + ExtractAccessor>::getTuple( + cgh, eval.right_impl()))) { + auto LHSTuple = ExtractAccessor>::getTuple( + cgh, eval.left_impl()); + auto RHSTuple = ExtractAccessor>::getTuple( + cgh, eval.right_impl()); + return utility::tuple::append(LHSTuple, RHSTuple); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// TensorAssignOp +template +struct ExtractAccessor, Dev>> { + static auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator, Dev> eval) + -> decltype(utility::tuple::append( + ExtractAccessor>::getTuple( + eval.left_impl()), + ExtractAccessor>::getTuple( + eval.right_impl()))) { + auto LHSTuple = ExtractAccessor>::getTuple( + cgh, eval.left_impl()); + auto RHSTuple = ExtractAccessor>::getTuple( + cgh, eval.right_impl()); + return utility::tuple::append(LHSTuple, RHSTuple); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TensorForcedEvalOp +template +struct ExtractAccessor, Dev>> { + using actual_type = + typename Eigen::internal::remove_all, Dev>::CoeffReturnType>::type; + static auto getTuple( + cl::sycl::handler& cgh, + const TensorEvaluator, Dev> eval) + -> decltype(utility::tuple::make_tuple( + (eval.device() + .template get_sycl_accessor( + eval.dimensions().TotalSize(), cgh, eval.data())))) { + return utility::tuple::make_tuple( + (eval.device() + .template get_sycl_accessor( + eval.dimensions().TotalSize(), cgh, eval.data()))); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// TensorForcedEvalOp +template +struct ExtractAccessor, Dev>> + : ExtractAccessor, Dev>> {}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// const TensorEvalToOp +template +struct ExtractAccessor, Dev>> { + using actual_type = + typename Eigen::internal::remove_all, Dev>::CoeffReturnType>::type; + + static auto getTuple(cl::sycl::handler& cgh, + TensorEvaluator, Dev> eval) + -> decltype(utility::tuple::append( + utility::tuple::make_tuple( + (eval.device() + .template get_sycl_accessor( + eval.dimensions().TotalSize(), cgh, eval.data()))), + ExtractAccessor>::getTuple(cgh, + eval.impl()))) { + auto LHSTuple = utility::tuple::make_tuple( + (eval.device() + .template get_sycl_accessor( + eval.dimensions().TotalSize(), cgh, eval.data()))); + + auto RHSTuple = + ExtractAccessor>::getTuple(cgh, eval.impl()); + return utility::tuple::append(LHSTuple, RHSTuple); + } +}; + +/// specialisation of the \ref ExtractAccessor struct when the node type is +/// TensorEvalToOp +template +struct ExtractAccessor, Dev>> + : ExtractAccessor, Dev>> {}; + +/// template deduction for \ref ExtractAccessor +template +auto createTupleOfAccessors(cl::sycl::handler& cgh, const Evaluator& expr) + -> decltype(ExtractAccessor::getTuple(cgh, expr)) { + return ExtractAccessor::getTuple(cgh, expr); +} +} +} +} +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXTRACT_ACCESSOR_HPP diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h new file mode 100644 index 000000000..f69c5afcb --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractFunctors.h @@ -0,0 +1,313 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensorSyclextractFunctors.h + * + * \brief: + * Used to extract all the functors allocated to each node of the expression +*tree. + * +*****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXTRACT_FUNCTORS_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXTRACT_FUNCTORS_HPP + +namespace Eigen { +namespace TensorSycl { +namespace internal { +/// \struct FunctorExtractor: This struct is used to extract the functors +/// constructed on +/// the host-side, to pack them and reuse them in reconstruction of the +/// expression on the device. +/// We have to do that as in Eigen the functors are not stateless so we cannot +/// re-instantiate them on the device. +/// We have to pass whatever instantiated to the device. +template +struct FunctorExtractor; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// TensorMap: +template +struct FunctorExtractor< + TensorEvaluator, Dev>> { + using Dimensions = typename PlainObjectType::Dimensions; + const Dimensions m_dimensions; + const Dimensions& dimensions() const { return m_dimensions; } + FunctorExtractor( + const TensorEvaluator, Dev>& expr) + : m_dimensions(expr.dimensions()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// const TensorMap +template +struct FunctorExtractor< + TensorEvaluator, Dev>> { + using Dimensions = typename PlainObjectType::Dimensions; + const Dimensions m_dimensions; + const Dimensions& dimensions() const { return m_dimensions; } + FunctorExtractor( + const TensorEvaluator, Dev>& + expr) + : m_dimensions(expr.dimensions()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// TensorForcedEvalOp +template +struct FunctorExtractor, Dev>> { + using Dimensions = typename Expr::Dimensions; + const Dimensions m_dimensions; + const Dimensions& dimensions() const { return m_dimensions; } + FunctorExtractor(const TensorEvaluator, Dev>& expr) + : m_dimensions(expr.dimensions()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// const TensorForcedEvalOp +template +struct FunctorExtractor, Dev>> { + using Dimensions = + typename TensorEvaluator, Dev>::Dimensions; + const Dimensions m_dimensions; + const Dimensions& dimensions() const { return m_dimensions; } + FunctorExtractor( + const TensorEvaluator, Dev>& expr) + : m_dimensions(expr.dimensions()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// TensorCwiseNullaryOp +template +struct FunctorExtractor< + TensorEvaluator, Dev>> { + FunctorExtractor> rhsExpr; + OP func; + FunctorExtractor( + TensorEvaluator, Dev>& expr) + : rhsExpr(expr.impl()), func(expr.functor()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// const TensorCwiseNullaryOp +template +struct FunctorExtractor< + TensorEvaluator, Dev>> { + FunctorExtractor> rhsExpr; + OP func; + FunctorExtractor( + const TensorEvaluator, Dev>& expr) + : rhsExpr(expr.impl()), func(expr.functor()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// TensorBroadcastingOp +template +struct FunctorExtractor< + TensorEvaluator, Dev>> { + FunctorExtractor> rhsExpr; + OP func; + FunctorExtractor( + const TensorEvaluator, Dev>& expr) + : rhsExpr(expr.impl()), func(expr.functor()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// const TensorBroadcastingOp +template +struct FunctorExtractor< + TensorEvaluator, Dev>> { + FunctorExtractor> rhsExpr; + OP func; + FunctorExtractor( + const TensorEvaluator, Dev>& expr) + : rhsExpr(expr.impl()), func(expr.functor()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// TensorCwiseUnaryOp +template +struct FunctorExtractor, Dev>> { + FunctorExtractor> rhsExpr; + OP func; + FunctorExtractor( + const TensorEvaluator, Dev>& expr) + : rhsExpr(expr.impl()), func(expr.functor()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// const TensorCwiseUnaryOp +template +struct FunctorExtractor< + TensorEvaluator, Dev>> { + FunctorExtractor> rhsExpr; + OP func; + FunctorExtractor( + const TensorEvaluator, Dev>& expr) + : rhsExpr(expr.impl()), func(expr.functor()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// TensorCwiseBinaryOp +template +struct FunctorExtractor< + TensorEvaluator, Dev>> { + FunctorExtractor> lhsExpr; + FunctorExtractor> rhsExpr; + OP func; + FunctorExtractor( + const TensorEvaluator, Dev>& + expr) + : lhsExpr(expr.left_impl()), + rhsExpr(expr.right_impl()), + func(expr.functor()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// const TensorCwiseBinaryOp +template +struct FunctorExtractor< + TensorEvaluator, Dev>> { + FunctorExtractor> lhsExpr; + FunctorExtractor> rhsExpr; + OP func; + FunctorExtractor(const TensorEvaluator< + const TensorCwiseBinaryOp, Dev>& expr) + : lhsExpr(expr.left_impl()), + rhsExpr(expr.right_impl()), + func(expr.functor()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// const TensorCwiseTernaryOp +template +struct FunctorExtractor, Dev>> { + FunctorExtractor> arg1Expr; + FunctorExtractor> arg2Expr; + FunctorExtractor> arg3Expr; + OP func; + FunctorExtractor(const TensorEvaluator< + const TensorCwiseTernaryOp, + Dev>& expr) + : arg1Expr(expr.arg1Impl()), + arg2Expr(expr.arg2Impl()), + arg3Expr(expr.arg3Impl()), + func(expr.functor()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// TensorCwiseTernaryOp +template +struct FunctorExtractor, Dev>> { + FunctorExtractor> arg1Expr; + FunctorExtractor> arg2Expr; + FunctorExtractor> arg3Expr; + OP func; + FunctorExtractor( + const TensorEvaluator< + TensorCwiseTernaryOp, Dev>& expr) + : arg1Expr(expr.arg1Impl()), + arg2Expr(expr.arg2Impl()), + arg3Expr(expr.arg3Impl()), + func(expr.functor()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// const TensorCwiseSelectOp +template +struct FunctorExtractor< + TensorEvaluator, Dev>> { + FunctorExtractor> ifExpr; + FunctorExtractor> thenExpr; + FunctorExtractor> elseExpr; + FunctorExtractor(const TensorEvaluator< + const TensorSelectOp, Dev>& expr) + : ifExpr(expr.cond_impl()), + thenExpr(expr.then_impl()), + elseExpr(expr.else_impl()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// TensorCwiseSelectOp +template +struct FunctorExtractor< + TensorEvaluator, Dev>> { + FunctorExtractor ifExpr; + FunctorExtractor thenExpr; + FunctorExtractor elseExpr; + FunctorExtractor( + const TensorEvaluator, Dev>& + expr) + : ifExpr(expr.cond_impl()), + thenExpr(expr.then_impl()), + elseExpr(expr.else_impl()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// TensorAssignOp +template +struct FunctorExtractor< + TensorEvaluator, Dev>> { + FunctorExtractor> lhsExpr; + FunctorExtractor> rhsExpr; + FunctorExtractor( + const TensorEvaluator, Dev>& expr) + : lhsExpr(expr.left_impl()), rhsExpr(expr.right_impl()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// const TensorAssignOp +template +struct FunctorExtractor< + TensorEvaluator, Dev>> { + FunctorExtractor> lhsExpr; + FunctorExtractor> rhsExpr; + FunctorExtractor( + const TensorEvaluator, Dev>& expr) + : lhsExpr(expr.left_impl()), rhsExpr(expr.right_impl()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// TensorEvalToOp +template +struct FunctorExtractor, Dev>> { + FunctorExtractor> rhsExpr; + FunctorExtractor(const TensorEvaluator, Dev>& expr) + : rhsExpr(expr.impl()) {} +}; + +/// specialisation of the \ref FunctorExtractor struct when the node type is +/// const TensorEvalToOp +template +struct FunctorExtractor, Dev>> { + FunctorExtractor> rhsExpr; + FunctorExtractor( + const TensorEvaluator, Dev>& expr) + : rhsExpr(expr.impl()) {} +}; + +/// template deduction function for FunctorExtractor +template +auto extractFunctors(const Evaluator& evaluator) + -> FunctorExtractor { + return FunctorExtractor(evaluator); +} +} // namespace internal +} // namespace TensorSycl +} // namespace Eigen + +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXTRACT_FUNCTORS_HPP diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h new file mode 100644 index 000000000..77e0e15e1 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h @@ -0,0 +1,188 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensorSyclLeafCount.h + * + * \brief: + * The leaf count used the pre-order expression tree traverse in order to name + * count the number of leaf nodes in the expression + * +*****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_LEAF_COUNT_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_LEAF_COUNT_HPP + +namespace Eigen { +namespace TensorSycl { +namespace internal { +/// \brief LeafCount used to counting terminal nodes. The total number of +/// leaf nodes is used by MakePlaceHolderExprHelper to find the order +/// of the leaf node in a expression tree at compile time. +template +struct LeafCount; + +/// specialisation of the \ref LeafCount struct when the node type is const +/// TensorMap +template class MakePointer_> +struct LeafCount> { + static const size_t Count = 1; +}; + +/// specialisation of the \ref LeafCount struct when the node type is TensorMap +template class MakePointer_> +struct LeafCount> { + static const size_t Count = 1; +}; + +/// specialisation of the \ref LeafCount struct when the node type is const +/// TensorCwiseNullaryOp +template +struct LeafCount> { + static const size_t Count = LeafCount::Count; +}; + +/// specialisation of the \ref LeafCount struct when the node type is +/// TensorCwiseNullaryOp +template +struct LeafCount> { + static const size_t Count = LeafCount::Count; +}; + +/// specialisation of the \ref LeafCount struct when the node type is const +/// TensorBroadcastingOp +template +struct LeafCount> { + static const size_t Count = LeafCount::Count; +}; + +/// specialisation of the \ref LeafCount struct when the node type is +/// TensorCwiseNullaryOp +template +struct LeafCount> { + static const size_t Count = LeafCount::Count; +}; + +// TensorCwiseUnaryOp +template +struct LeafCount> { + static const size_t Count = LeafCount::Count; +}; + +// TensorCwiseUnaryOp +template +struct LeafCount> { + static const size_t Count = LeafCount::Count; +}; + +/// specialisation of the \ref LeafCount struct when the node type is const +/// TensorCwiseBinaryOp +template +struct LeafCount> { + static const size_t Count = + LeafCount::Count + LeafCount::Count; +}; + +/// specialisation of the \ref LeafCount struct when the node type is +/// TensorCwiseBinaryOp +template +struct LeafCount> { + static const size_t Count = + LeafCount::Count + LeafCount::Count; +}; + +/// specialisation of the \ref LeafCount struct when the node type is +/// TensorCwiseTernaryOp +template +struct LeafCount> { + static const size_t Count = LeafCount::Count + + LeafCount::Count + + LeafCount::Count; +}; + +/// specialisation of the \ref LeafCount struct when the node type is const +/// TensorCwiseTernaryOp +template +struct LeafCount> { + static const size_t Count = LeafCount::Count + + LeafCount::Count + + LeafCount::Count; +}; + +/// specialisation of the \ref LeafCount struct when the node type is +/// TensorCwiseSelectOp +template +struct LeafCount> { + static const size_t Count = LeafCount::Count + + LeafCount::Count + + LeafCount::Count; +}; + +/// specialisation of the \ref LeafCount struct when the node type is const +/// TensorCwiseSelectOp +template +struct LeafCount> { + static const size_t Count = LeafCount::Count + + LeafCount::Count + + LeafCount::Count; +}; + +/// specialisation of the \ref LeafCount struct when the node type is +/// TensorAssignOp +template +struct LeafCount> { + static const size_t Count = + LeafCount::Count + LeafCount::Count; +}; + +/// specialisation of the \ref LeafCount struct when the node type is const +/// TensorAssignOp +template +struct LeafCount> { + static const size_t Count = + LeafCount::Count + LeafCount::Count; +}; + +/// specialisation of the \ref LeafCount struct when the node type is const +/// TensorForcedEvalOp +template +struct LeafCount> { + static const size_t Count = 1; +}; + +/// specialisation of the \ref LeafCount struct when the node type is +/// TensorForcedEvalOp +template +struct LeafCount> { + static const size_t Count = 1; +}; + +/// specialisation of the \ref LeafCount struct when the node type is const +/// TensorEvalToOp +template +struct LeafCount> { + static const size_t Count = 1 + LeafCount::Count; +}; + +/// specialisation of the \ref LeafCount struct when the node type is +/// TensorEvalToOp +template +struct LeafCount> { + static const size_t Count = 1 + LeafCount::Count; +}; +} +} +} // namespace Eigen + +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_LEAF_COUNT_HPP diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolder.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolder.h new file mode 100644 index 000000000..87995a25e --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolder.h @@ -0,0 +1,151 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensorSyclPlaceHolder.h + * + * \brief: + * The PlaceHolder expression are nothing but a container preserving + * the order of actual data in the tuple of sycl buffer. + * +*****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_PLACEHOLDER_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_PLACEHOLDER_HPP + +namespace Eigen { +namespace internal { +/// \struct PlaceHolder +/// \brief PlaceHolder is used to replace the \ref TensorMap in the expression +/// tree. +/// PlaceHolder contains the order of the leaf node in the expression tree. +template +struct PlaceHolder { + static constexpr size_t I = N; + using Type = Scalar; +}; + +template class MakePointer_, size_t N> +struct PlaceHolder, + N> { + static constexpr size_t I = N; + + using Type = const TensorMap; + + typedef typename Type::Self Self; + typedef typename Type::Base Base; + typedef typename Type::Nested Nested; + typedef typename Type::StorageKind StorageKind; + typedef typename Type::Index Index; + typedef typename Type::Scalar Scalar; + typedef typename Type::RealScalar RealScalar; + typedef typename Type::CoeffReturnType CoeffReturnType; +}; + +/// \brief specialisation of the PlaceHolder node for TensorForcedEvalOp. The +/// TensorForcedEvalOp act as a leaf node for its parent node. +template +struct PlaceHolder, N> { + static constexpr size_t I = N; + + using Type = const TensorForcedEvalOp; + + typedef typename Type::Nested Nested; + typedef typename Type::StorageKind StorageKind; + typedef typename Type::Index Index; + + typedef typename Type::Scalar Scalar; + typedef typename Type::Packet Packet; + + typedef typename Type::RealScalar RealScalar; + typedef typename Type::CoeffReturnType CoeffReturnType; + typedef typename Type::PacketReturnType PacketReturnType; +}; + +template +struct PlaceHolder, N> { + static constexpr size_t I = N; + + using Type = TensorForcedEvalOp; + + typedef typename Type::Nested Nested; + typedef typename Type::StorageKind StorageKind; + typedef typename Type::Index Index; + + typedef typename Type::Scalar Scalar; + typedef typename Type::Packet Packet; + + typedef typename Type::RealScalar RealScalar; + typedef typename Type::CoeffReturnType CoeffReturnType; + typedef typename Type::PacketReturnType PacketReturnType; +}; + +/// \brief specialisation of the PlaceHolder node for const TensorMap +template class Makepointer_, size_t N> +struct PlaceHolder, N> { + static constexpr size_t I = N; + + using Type = TensorMap; + + typedef typename Type::Self Self; + typedef typename Type::Base Base; + typedef typename Type::Nested Nested; + typedef typename Type::StorageKind StorageKind; + typedef typename Type::Index Index; + typedef typename Type::Scalar Scalar; + typedef typename Type::Packet Packet; + typedef typename Type::RealScalar RealScalar; + typedef typename Type::CoeffReturnType CoeffReturnType; + typedef typename Base::PacketReturnType PacketReturnType; +}; + +/// specialisation of the traits struct for PlaceHolder +template class Makepointer_, size_t N> +struct traits< + PlaceHolder, N>> + : public traits { + typedef traits BaseTraits; + typedef typename BaseTraits::Scalar Scalar; + typedef typename BaseTraits::StorageKind StorageKind; + typedef typename BaseTraits::Index Index; + static const int NumDimensions = BaseTraits::NumDimensions; + static const int Layout = BaseTraits::Layout; + enum { + Options = Options_, + Flags = BaseTraits::Flags, + }; +}; + +template class Makepointer_, size_t N> +struct traits< + PlaceHolder, N>> + : public traits { + typedef traits BaseTraits; + typedef typename BaseTraits::Scalar Scalar; + typedef typename BaseTraits::StorageKind StorageKind; + typedef typename BaseTraits::Index Index; + static const int NumDimensions = BaseTraits::NumDimensions; + static const int Layout = BaseTraits::Layout; + enum { + Options = Options_, + Flags = BaseTraits::Flags, + }; +}; + +} // end namespoace internal +} // end namespoace Eigen + +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_PLACEHOLDER_HPP diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h new file mode 100644 index 000000000..dbd7a8544 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h @@ -0,0 +1,293 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensorSyclPlaceHolderExpr.h + * + * \brief: + * This is the specialisation of the placeholder expression based on the + * operation type + * +*****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_PLACEHOLDER_EXPR_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_PLACEHOLDER_EXPR_HPP + +namespace Eigen { +namespace TensorSycl { +namespace internal { +/// \sttruct PlaceHolderExpression +/// \brief it is used to create the PlaceHolder expression. The PlaceHolder +/// expression is a copy of expression type in which the TensorMap of the has +/// been replaced with PlaceHolder. +template +struct PlaceHolderExpression; + +/// specialisation of the \ref PlaceHolderExpression when the node is TensorMap +template class MakePointer_, size_t N> +struct PlaceHolderExpression< + Eigen::TensorMap, + Options2_, MakePointer_>, + N> { + using Type = Eigen::internal::PlaceHolder< + Eigen::TensorMap, + Options2_, MakePointer_>, + N>; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is const +/// TensorMap +template class MakePointer_, size_t N> +struct PlaceHolderExpression< + const Eigen::TensorMap, + Options2_, MakePointer_>, + N> { + using Type = const Eigen::internal::PlaceHolder< + const TensorMap, + Options2_, MakePointer_>, + N>; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is +/// TensorCwiseNullaryOp +template +struct PlaceHolderExpression, N> { + using RHSPlaceHolderType = typename PlaceHolderExpression::Type; + using Type = TensorCwiseNullaryOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is const +/// TensorCwiseNullaryOp +template +struct PlaceHolderExpression, N> { + using RHSPlaceHolderType = typename PlaceHolderExpression::Type; + using Type = const TensorCwiseNullaryOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is +/// TensorBroadcastingOp +template +struct PlaceHolderExpression, N> { + using RHSPlaceHolderType = typename PlaceHolderExpression::Type; + using Type = TensorBroadcastingOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is const +/// TensorBroadcastingOp +template +struct PlaceHolderExpression, N> { + using RHSPlaceHolderType = typename PlaceHolderExpression::Type; + using Type = const TensorBroadcastingOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is +/// TensorCwiseUnaryOp +template +struct PlaceHolderExpression, N> { + using RHSPlaceHolderType = typename PlaceHolderExpression::Type; + using Type = TensorCwiseUnaryOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is const +/// TensorCwiseUnaryOp +template +struct PlaceHolderExpression, N> { + using RHSPlaceHolderType = typename PlaceHolderExpression::Type; + using Type = const TensorCwiseUnaryOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is +/// TensorCwiseBinaryOp +template +struct PlaceHolderExpression, N> { + static const size_t RHSLeafCount = LeafCount::Count; + + using LHSPlaceHolderType = + typename PlaceHolderExpression::Type; + using RHSPlaceHolderType = typename PlaceHolderExpression::Type; + + using Type = TensorCwiseBinaryOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is const +/// TensorCwiseBinaryOp +template +struct PlaceHolderExpression, + N> { + static const size_t RHSLeafCount = LeafCount::Count; + + using LHSPlaceHolderType = + typename PlaceHolderExpression::Type; + using RHSPlaceHolderType = typename PlaceHolderExpression::Type; + + using Type = + const TensorCwiseBinaryOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is const +/// TensorCwiseSelectOp +template +struct PlaceHolderExpression< + const TensorCwiseTernaryOp, N> { + static const size_t Arg3LeafCount = LeafCount::Count; + static const size_t Arg2LeafCount = LeafCount::Count; + + using Arg1PlaceHolderType = + typename PlaceHolderExpression::Type; + using Arg2PlaceHolderType = + typename PlaceHolderExpression::Type; + + using Arg3PlaceHolderType = typename PlaceHolderExpression::Type; + + using Type = + const TensorCwiseTernaryOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is +/// TensorCwiseSelectOp +template +struct PlaceHolderExpression< + TensorCwiseTernaryOp, N> { + static const size_t Arg3LeafCount = LeafCount::Count; + static const size_t Arg2LeafCount = LeafCount::Count; + + using Arg1PlaceHolderType = + typename PlaceHolderExpression::Type; + using Arg2PlaceHolderType = + typename PlaceHolderExpression::Type; + + using Arg3PlaceHolderType = typename PlaceHolderExpression::Type; + + using Type = TensorCwiseTernaryOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is const +/// TensorCwiseSelectOp +template +struct PlaceHolderExpression, + N> { + static const size_t ElseLeafCount = LeafCount::Count; + static const size_t ThenLeafCount = LeafCount::Count; + + using IfPlaceHolderType = + typename PlaceHolderExpression::Type; + using ThenPlaceHolderType = + typename PlaceHolderExpression::Type; + + using ElsePlaceHolderType = typename PlaceHolderExpression::Type; + + using Type = const TensorSelectOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is +/// TensorCwiseSelectOp +template +struct PlaceHolderExpression, N> { + static const size_t ElseLeafCount = LeafCount::Count; + static const size_t ThenLeafCount = LeafCount::Count; + + using IfPlaceHolderType = + typename PlaceHolderExpression::Type; + using ThenPlaceHolderType = + typename PlaceHolderExpression::Type; + + using ElsePlaceHolderType = typename PlaceHolderExpression::Type; + + using Type = TensorSelectOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is +/// TensorAssignOp +template +struct PlaceHolderExpression, N> { + static const size_t RHSLeafCount = LeafCount::Count; + + using LHSPlaceHolderType = + typename PlaceHolderExpression::Type; + using RHSPlaceHolderType = typename PlaceHolderExpression::Type; + + using Type = TensorAssignOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is const +/// TensorAssignOp +template +struct PlaceHolderExpression, N> { + static const size_t RHSLeafCount = LeafCount::Count; + + using LHSPlaceHolderType = + typename PlaceHolderExpression::Type; + using RHSPlaceHolderType = typename PlaceHolderExpression::Type; + + using Type = const TensorAssignOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is const +/// TensorForcedEvalOp +template +struct PlaceHolderExpression, N> { + using Type = + const Eigen::internal::PlaceHolder, N>; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is +/// TensorForcedEvalOp +template +struct PlaceHolderExpression, N> { + using Type = Eigen::internal::PlaceHolder, N>; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is const +/// TensorEvalToOp +template +struct PlaceHolderExpression, N> { + static const size_t RHSLeafCount = LeafCount::Count; + + using RHSPlaceHolderType = typename PlaceHolderExpression::Type; + + using Type = const TensorEvalToOp; +}; + +/// specialisation of the \ref PlaceHolderExpression when the node is +/// TensorEvalToOp +template +struct PlaceHolderExpression, N> { + static const size_t RHSLeafCount = LeafCount::Count; + + using RHSPlaceHolderType = typename PlaceHolderExpression::Type; + + using Type = TensorEvalToOp; +}; + +/// template deduction for \ref PlaceHolderExpression struct +template +struct createPlaceHolderExpression { + static const size_t TotalLeaves = LeafCount::Count; + using Type = typename PlaceHolderExpression::Type; +}; +} +} +} // namespace Eigen + +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_PLACEHOLDER_EXPR_HPP diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h new file mode 100644 index 000000000..3758d46a0 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h @@ -0,0 +1,84 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Cummins Chris PhD student at The University of Edinburgh. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensorSyclRun.h + * + * \brief: + * Schedule_kernel invoke an specialised version of kernel struct. The + * specialisation is based on the data dimension in sycl buffer + * +*****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORSYCL_SYCLRUN_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORSYCL_SYCLRUN_HPP + +namespace Eigen { +namespace TensorSycl { +/// The run function in tensor sycl convert the expression tree to a buffer +/// based expression tree; +/// creates the expression tree for the device with accessor to buffers; +/// construct the kernel and submit it to the sycl queue. +template +void run(Expr &expr, Dev &dev) { + Eigen::TensorEvaluator evaluator(expr, dev); + const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL); + if (needs_assign) { + using PlaceHolderExpr = + typename internal::createPlaceHolderExpression::Type; + auto functors = internal::extractFunctors(evaluator); + + dev.m_queue.submit([&](cl::sycl::handler &cgh) { + + // create a tuple of accessors from Evaluator + auto tuple_of_accessors = + internal::createTupleOfAccessors(cgh, evaluator); + const auto range = + utility::tuple::get<0>(tuple_of_accessors).get_range()[0]; + + size_t outTileSize = range; + if (range > 64) outTileSize = 64; + size_t yMode = range % outTileSize; + int yRange = static_cast(range); + if (yMode != 0) yRange += (outTileSize - yMode); + + // run the kernel + cgh.parallel_for( + cl::sycl::nd_range<1>(cl::sycl::range<1>(yRange), + cl::sycl::range<1>(outTileSize)), + [=](cl::sycl::nd_item<1> itemID) { + using DevExpr = + typename internal::ConvertToDeviceExpression::Type; + + auto device_expr = + internal::createDeviceExpression( + functors, tuple_of_accessors); + auto device_evaluator = + Eigen::TensorEvaluator( + device_expr.expr, Eigen::DefaultDevice()); + + if (itemID.get_global_linear_id() < range) { + device_evaluator.evalScalar( + static_cast(itemID.get_global_linear_id())); + } + }); + }); + dev.m_queue.throw_asynchronous(); + } + evaluator.cleanup(); +} +} // namespace TensorSycl +} // namespace Eigen + +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSORSYCL_SYCLRUN_HPP diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h new file mode 100644 index 000000000..8b9fc52c4 --- /dev/null +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclTuple.h @@ -0,0 +1,264 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/***************************************************************** + * TensroSyclTuple.h + * + * \brief: + * Minimal implementation of std::tuple that can be used inside a SYCL kernel. + * +*****************************************************************/ + +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORSYCL_TUPLE_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORSYCL_TUPLE_HPP +namespace utility { +namespace tuple { +/// \struct EnableIf +/// \brief The EnableIf struct is used to statically define type based on the +/// condition. +template +struct EnableIf {}; +/// \brief specialisation of the \ref EnableIf when the condition is true +template +struct EnableIf { + typedef T type; +}; + +/// \struct Tuple +/// \brief is a fixed-size collection of heterogeneous values +/// \ztparam Ts... - the types of the elements that the tuple stores. +/// Empty list is supported. +template +struct Tuple {}; + +/// \brief specialisation of the \ref Tuple class when the tuple has at least +/// one element. +/// \tparam T : the type of the first element in the tuple. +/// \tparam Ts... the rest of the elements in the tuple. Ts... can be empty. +template +struct Tuple { + Tuple(T t, Ts... ts) : head(t), tail(ts...) {} + + T head; + Tuple tail; +}; + +/// \struct ElemTypeHolder +/// \brief ElemTypeHolder class is used to specify the types of the +/// elements inside the tuple +/// \tparam size_t the number of elements inside the tuple +/// \tparam class the tuple class +template +struct ElemTypeHolder; + +/// \brief specialisation of the \ref ElemTypeHolder class when the number +/// elements inside the tuple is 1 +template +struct ElemTypeHolder<0, Tuple> { + typedef T type; +}; + +/// \brief specialisation of the \ref ElemTypeHolder class when the number of +/// elements inside the tuple is bigger than 1. It recursively call itself to +/// detect the type of each element in the tuple +/// \tparam T : the type of the first element in the tuple. +/// \tparam Ts... the rest of the elements in the tuple. Ts... can be empty. +/// \tparam K is the Kth element in the tuple +template +struct ElemTypeHolder> { + typedef typename ElemTypeHolder>::type type; +}; + +/// get +/// \brief Extracts the first element from the tuple. +/// K=0 represents the first element of the tuple. The tuple cannot be empty. +/// \tparam Ts... are the elements type in the tuple. +/// \param t is the tuple whose contents to extract +/// \return typename ElemTypeHolder<0, Tuple>::type &>::type +template +typename EnableIf>::type &>::type +get(Tuple &t) { + return t.head; +} +/// get +/// \brief Extracts the Kth element from the tuple. +/// \tparam K is an integer value in [0,sizeof...(Types)). +/// \tparam T is the (sizeof...(Types) -(K+1)) element in the tuple +/// \tparam Ts... are the elements type in the tuple. +/// \param t is the tuple whose contents to extract +/// \return typename ElemTypeHolder>::type &>::type +template +typename EnableIf>::type &>::type +get(Tuple &t) { + return get(t.tail); +} + +/// get +/// \brief Extracts the first element from the tuple when the tuple and all the +/// elements inside are const. +/// K=0 represents the first element of the tuple. The tuple cannot be empty. +/// \tparam Ts... are the elements type in the tuple. +/// \param t is the const tuple whose contents to extract +/// \return const typename ElemTypeHolder<0, Tuple>::type &>::type +template +typename EnableIf>::type &>::type +get(const Tuple &t) { + return t.head; +} + +/// get +/// \brief Extracts the Kth element from the tuple when the tuple and all the +/// elements inside are const. +/// \tparam K is an integer value in [0,sizeof...(Types)). +/// \tparam T is the (sizeof...(Types) -(K+1)) element in the tuple +/// \tparam Ts... are the elements type in the tuple. +/// \param t is the const tuple whose contents to extract +/// \return const typename ElemTypeHolder>::type &>::type +template +typename EnableIf< + k != 0, const typename ElemTypeHolder>::type &>::type +get(const Tuple &t) { + return get(t.tail); +} +/// make_tuple +/// \brief Creates a tuple object, deducing the target type from the types of +/// arguments. +/// \tparam Args the type of the arguments to construct the tuple from +/// \param args zero or more arguments to construct the tuple from +/// \return Tuple +template +Tuple make_tuple(Args... args) { + return Tuple(args...); +} + +/// size +/// \brief Provides access to the number of elements in a tuple as a +/// compile-time constant expression. +/// \tparam Args the type of the arguments to construct the tuple from +/// \return size_t +template +static constexpr size_t size(Tuple &) { + return sizeof...(Args); +} + +/// \struct Index_list +/// \brief Creates a list of index from the elements in the tuple +/// \tparam Is... a list of index from [0 to sizeof...(tuple elements)) +template +struct Index_list {}; + +/// \struct RangeBuilder +/// \brief Collects internal details for generating index ranges [MIN, MAX) +/// Declare primary template for index range builder +/// \tparam MIN is the starting index in the tuple +/// \tparam N represents sizeof..(elements)- sizeof...(Is) +/// \tparam Is... are the list of generated index so far +template +struct RangeBuilder; + +/// \brief base Step: Specialisation of the \ref RangeBuilder when the +/// MIN==MAX. In this case the Is... is [0 to sizeof...(tuple elements)) +/// \tparam MIN is the starting index of the tuple +/// \tparam Is is [0 to sizeof...(tuple elements)) +template +struct RangeBuilder { + typedef Index_list type; +}; + +/// Induction step: Specialisation of the RangeBuilder class when N!=MIN +/// in this case we are recursively subtracting the N by one and adding one +/// index to Is... list until MIN==N +/// \tparam MIN is the starting index in the tuple +/// \tparam N represents sizeof..(elements)- sizeof...(Is) +/// \tparam Is... are the list of generated index so far +template +struct RangeBuilder : public RangeBuilder {}; + +/// \brief IndexRange that returns a [MIN, MAX) index range +/// \tparam MIN is the starting index in the tuple +/// \tparam MAX is the size of the tuple +template +using Index_range = typename RangeBuilder::type; + +/// append_impl +/// \brief unpacking the elements of the input tuple t and creating a new tuple +/// by adding element a at the end of it. +/// \tparam Args... the type of the elements inside the tuple t +/// \tparam T the type of the new element going to be added at the end of tuple +/// \tparam I... is the list of index from [0 to sizeof...(t)) +/// \param t the tuple on which we want to append a. +/// \param a the new elements going to be added to the tuple +/// \return Tuple +template +Tuple append_impl(utility::tuple::Tuple t, T a, + utility::tuple::Index_list) { + return utility::tuple::make_tuple(get(t)..., a); +} + +/// append +/// \brief the deduction function for \ref append_impl that automatically +/// generate the \ref Index_range +/// \tparam Args... the type of the elements inside the tuple t +/// \tparam T the type of the new element going to be added at the end of tuple +/// \param t the tuple on which we want to append a. +/// \param a the new elements going to be added to the tuple +/// \return Tuple +template +Tuple append(Tuple t, T a) { + return utility::tuple::append_impl( + t, a, utility::tuple::Index_range<0, sizeof...(Args)>()); +} + +/// append_impl +/// \brief This is an specialised of \ref append_impl when we want to +/// concatenate +/// tuple t2 at the end of the tuple t1. Here we unpack both tuples, generate +/// the +/// Index_range for each of them and create an output tuple T that contains both +/// elements of t1 and t2. +/// \tparam Args1... the type of the elements inside the tuple t1 +/// \tparam Args2... the type of the elements inside the tuple t2 +/// \tparam I1... is the list of index from [0 to sizeof...(t1)) +/// \tparam I2... is the list of index from [0 to sizeof...(t2)) +/// \param t1 is the tuple on which we want to append t2. +/// \param t2 is the tuple that is going to be added on t1. +/// \return Tuple +template +Tuple append_impl(utility::tuple::Tuple t1, + utility::tuple::Tuple t2, + utility::tuple::Index_list, + utility::tuple::Index_list) { + return utility::tuple::make_tuple(utility::tuple::get(t1)..., + utility::tuple::get(t2)...); +} +/// append +/// \brief deduction function for \ref append_impl when we are appending tuple +/// t1 by tuple t2. In this case the \ref Index_range for both tuple are +/// automatically generated. +/// \tparam Args1... the type of the elements inside the tuple t1 +/// \tparam Args2... the type of the elements inside the tuple t2 +/// \param t1 is the tuple on which we want to append t2. +/// \param t2 is the tuple that is going to be added on t1. +/// \return Tuple +template +Tuple append(utility::tuple::Tuple t1, + utility::tuple::Tuple t2) { + return utility::tuple::append_impl( + t1, t2, utility::tuple::Index_range<0, sizeof...(Args1)>(), + utility::tuple::Index_range<0, sizeof...(Args2)>()); +} +} // tuple +} // utility +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSORSYCL_TUPLE_HPP diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h index b7597b3a5..62c5caf6c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h @@ -56,11 +56,12 @@ struct traits > Options = Options_, Flags = compute_tensor_flags::ret | (is_const::value ? 0 : LvalueBit) }; + template using MakePointer = MakePointer; }; -template -struct traits > +template class MakePointer_> +struct traits > { typedef Scalar_ Scalar; typedef Dense StorageKind; @@ -71,11 +72,12 @@ struct traits > Options = Options_, Flags = compute_tensor_flags::ret | (is_const::value ? 0: LvalueBit) }; + template using MakePointer = MakePointer_; }; -template -struct traits > +template class MakePointer_> +struct traits > : public traits { typedef traits BaseTraits; @@ -88,6 +90,7 @@ struct traits > Options = Options_, Flags = BaseTraits::Flags }; + template using MakePointer = MakePointer_; }; template diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 6188b421a..de9b5243a 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -138,6 +138,13 @@ endif() endif() if(EIGEN_TEST_CXX11) + if(EIGEN_TEST_SYCL) + ei_add_test_sycl(cxx11_tensor_sycl "-std=c++11") + ei_add_test_sycl(cxx11_tensor_sycl_forced_eval "-std=c++11") + ei_add_test_sycl(cxx11_tensor_sycl_broadcast "-std=c++11") + ei_add_test_sycl(cxx11_tensor_sycl_device "-std=c++11") + endif(EIGEN_TEST_SYCL) + # It should be safe to always run these tests as there is some fallback code for # older compiler that don't support cxx11. set(CMAKE_CXX_STANDARD 11) diff --git a/unsupported/test/cxx11_tensor_sycl.cpp b/unsupported/test/cxx11_tensor_sycl.cpp new file mode 100644 index 000000000..1ec9b1883 --- /dev/null +++ b/unsupported/test/cxx11_tensor_sycl.cpp @@ -0,0 +1,157 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_sycl +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int +#define EIGEN_USE_SYCL + +#include "main.h" +#include + +using Eigen::array; +using Eigen::SyclDevice; +using Eigen::Tensor; +using Eigen::TensorMap; + +// Types used in tests: +using TestTensor = Tensor; +using TestTensorMap = TensorMap>; + +void test_sycl_cpu() { + cl::sycl::gpu_selector s; + cl::sycl::queue q(s, [=](cl::sycl::exception_list l) { + for (const auto& e : l) { + try { + std::rethrow_exception(e); + } catch (cl::sycl::exception e) { + std::cout << e.what() << std::endl; + } + } + }); + SyclDevice sycl_device(q); + + int sizeDim1 = 100; + int sizeDim2 = 100; + int sizeDim3 = 100; + array tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; + TestTensor in1(tensorRange); + TestTensor in2(tensorRange); + TestTensor in3(tensorRange); + TestTensor out(tensorRange); + in1 = in1.random(); + in2 = in2.random(); + in3 = in3.random(); + TestTensorMap gpu_in1(in1.data(), tensorRange); + TestTensorMap gpu_in2(in2.data(), tensorRange); + TestTensorMap gpu_in3(in3.data(), tensorRange); + TestTensorMap gpu_out(out.data(), tensorRange); + + /// a=1.2f + gpu_in1.device(sycl_device) = gpu_in1.constant(1.2f); + sycl_device.deallocate(in1.data()); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { + VERIFY_IS_APPROX(in1(i,j,k), 1.2f); + } + } + } + printf("a=1.2f Test passed\n"); + + /// a=b*1.2f + gpu_out.device(sycl_device) = gpu_in1 * 1.2f; + sycl_device.deallocate(out.data()); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { + VERIFY_IS_APPROX(out(i,j,k), + in1(i,j,k) * 1.2f); + } + } + } + printf("a=b*1.2f Test Passed\n"); + + /// c=a*b + gpu_out.device(sycl_device) = gpu_in1 * gpu_in2; + sycl_device.deallocate(out.data()); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { + VERIFY_IS_APPROX(out(i,j,k), + in1(i,j,k) * + in2(i,j,k)); + } + } + } + printf("c=a*b Test Passed\n"); + + /// c=a+b + gpu_out.device(sycl_device) = gpu_in1 + gpu_in2; + sycl_device.deallocate(out.data()); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { + VERIFY_IS_APPROX(out(i,j,k), + in1(i,j,k) + + in2(i,j,k)); + } + } + } + printf("c=a+b Test Passed\n"); + + /// c=a*a + gpu_out.device(sycl_device) = gpu_in1 * gpu_in1; + sycl_device.deallocate(out.data()); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { + VERIFY_IS_APPROX(out(i,j,k), + in1(i,j,k) * + in1(i,j,k)); + } + } + } + + printf("c= a*a Test Passed\n"); + + //a*3.14f + b*2.7f + gpu_out.device(sycl_device) = gpu_in1 * gpu_in1.constant(3.14f) + gpu_in2 * gpu_in2.constant(2.7f); + sycl_device.deallocate(out.data()); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { + VERIFY_IS_APPROX(out(i,j,k), + in1(i,j,k) * 3.14f + + in2(i,j,k) * 2.7f); + } + } + } + printf("a*3.14f + b*2.7f Test Passed\n"); + + ///d= (a>0.5? b:c) + gpu_out.device(sycl_device) =(gpu_in1 > gpu_in1.constant(0.5f)).select(gpu_in2, gpu_in3); + sycl_device.deallocate(out.data()); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { + VERIFY_IS_APPROX(out(i, j, k), (in1(i, j, k) > 0.5f) + ? in2(i, j, k) + : in3(i, j, k)); + } + } + } + printf("d= (a>0.5? b:c) Test Passed\n"); + +} +void test_cxx11_tensor_sycl() { + CALL_SUBTEST(test_sycl_cpu()); +} diff --git a/unsupported/test/cxx11_tensor_sycl_broadcast.cpp b/unsupported/test/cxx11_tensor_sycl_broadcast.cpp new file mode 100644 index 000000000..1babbc038 --- /dev/null +++ b/unsupported/test/cxx11_tensor_sycl_broadcast.cpp @@ -0,0 +1,76 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_sycl_broadcast +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int +#define EIGEN_USE_SYCL + +#include "main.h" +#include + +using Eigen::array; +using Eigen::SyclDevice; +using Eigen::Tensor; +using Eigen::TensorMap; + +// Types used in tests: +using TestTensor = Tensor; +using TestTensorMap = TensorMap>; +static void test_sycl_broadcast(){ + + cl::sycl::gpu_selector s; + cl::sycl::queue q(s, [=](cl::sycl::exception_list l) { + for (const auto& e : l) { + try { + std::rethrow_exception(e); + } catch (cl::sycl::exception e) { + std::cout << e.what() << std::endl; + } + } + }); + SyclDevice sycl_device(q); + // BROADCAST test: + array in_range = {{2, 3, 5, 7}}; + array broadcasts = {{2, 3, 1, 4}}; + array out_range; // = in_range * broadcasts + for (size_t i = 0; i < out_range.size(); ++i) + out_range[i] = in_range[i] * broadcasts[i]; + + Tensor input(in_range); + Tensor output(out_range); + + for (int i = 0; i < input.size(); ++i) + input(i) = static_cast(i); + + TensorMap gpu_in(input.data(), in_range); + TensorMap gpu_out(output.data(), out_range); + gpu_out.device(sycl_device) = gpu_in.broadcast(broadcasts); + sycl_device.deallocate(output.data()); + + for (size_t i = 0; i < in_range.size(); ++i) + VERIFY_IS_EQUAL(output.dimension(i), out_range[i]); + + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 9; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 28; ++l) { + VERIFY_IS_APPROX(input(i%2,j%3,k%5,l%7), output(i,j,k,l)); + } + } + } + } + printf("Broadcast Test Passed\n"); +} + +void test_cxx11_tensor_sycl_broadcast() { + CALL_SUBTEST(test_sycl_broadcast()); +} diff --git a/unsupported/test/cxx11_tensor_sycl_device.cpp b/unsupported/test/cxx11_tensor_sycl_device.cpp new file mode 100644 index 000000000..2c1c17972 --- /dev/null +++ b/unsupported/test/cxx11_tensor_sycl_device.cpp @@ -0,0 +1,37 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_sycl_device +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int +#define EIGEN_USE_SYCL + +#include "main.h" +#include + + +void test_sycl_device() { + cl::sycl::gpu_selector s; + cl::sycl::queue q(s, [=](cl::sycl::exception_list l) { + for (const auto& e : l) { + try { + std::rethrow_exception(e); + } catch (cl::sycl::exception e) { + std::cout << e.what() << std::endl; + } + } + }); + SyclDevice sycl_device(q); + printf("Helo from ComputeCpp: Device Exists\n"); +} +void test_cxx11_tensor_sycl_device() { + CALL_SUBTEST(test_sycl_device()); +} diff --git a/unsupported/test/cxx11_tensor_sycl_forced_eval.cpp b/unsupported/test/cxx11_tensor_sycl_forced_eval.cpp new file mode 100644 index 000000000..ee934d4fa --- /dev/null +++ b/unsupported/test/cxx11_tensor_sycl_forced_eval.cpp @@ -0,0 +1,64 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_sycl_forced_eval +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int +#define EIGEN_USE_SYCL + +#include "main.h" +#include + +using Eigen::Tensor; + +void test_sycl_gpu() { + cl::sycl::gpu_selector s; + cl::sycl::queue q(s, [=](cl::sycl::exception_list l) { + for (const auto& e : l) { + try { + std::rethrow_exception(e); + } catch (cl::sycl::exception e) { + std::cout << e.what() << std::endl; + } + } + }); + SyclDevice sycl_device(q); + + int sizeDim1 = 100; + int sizeDim2 = 200; + int sizeDim3 = 200; + Eigen::array tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; + Eigen::Tensor in1(tensorRange); + Eigen::Tensor in2(tensorRange); + Eigen::Tensor out(tensorRange); + + in1 = in1.random() + in1.constant(10.0f); + in2 = in2.random() + in2.constant(10.0f); + + // creating TensorMap from tensor + Eigen::TensorMap> gpu_in1(in1.data(), tensorRange); + Eigen::TensorMap> gpu_in2(in2.data(), tensorRange); + Eigen::TensorMap> gpu_out(out.data(), tensorRange); + + /// c=(a+b)*b + gpu_out.device(sycl_device) =(gpu_in1 + gpu_in2).eval() * gpu_in2; + sycl_device.deallocate(out.data()); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { + VERIFY_IS_APPROX(out(i, j, k), + (in1(i, j, k) + in2(i, j, k)) * in2(i, j, k)); + } + } + } + printf("(a+b)*b Test Passed\n"); +} + +void test_cxx11_tensor_sycl_forced_eval() { CALL_SUBTEST(test_sycl_gpu()); } -- cgit v1.2.3 From c771df6bc32991a0990931ad39cb50b65a20ffbb Mon Sep 17 00:00:00 2001 From: Luke Iwanski Date: Mon, 19 Sep 2016 14:09:25 +0100 Subject: Updated the owners of the file. --- unsupported/test/cxx11_tensor_sycl.cpp | 7 +++++-- unsupported/test/cxx11_tensor_sycl_broadcast.cpp | 7 +++++-- unsupported/test/cxx11_tensor_sycl_device.cpp | 8 +++++--- unsupported/test/cxx11_tensor_sycl_forced_eval.cpp | 6 +++++- 4 files changed, 20 insertions(+), 8 deletions(-) diff --git a/unsupported/test/cxx11_tensor_sycl.cpp b/unsupported/test/cxx11_tensor_sycl.cpp index 1ec9b1883..a4dc9f9fa 100644 --- a/unsupported/test/cxx11_tensor_sycl.cpp +++ b/unsupported/test/cxx11_tensor_sycl.cpp @@ -1,13 +1,16 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2016 Benoit Steiner +// Copyright (C) 2016 +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - #define EIGEN_TEST_NO_LONGDOUBLE #define EIGEN_TEST_NO_COMPLEX #define EIGEN_TEST_FUNC cxx11_tensor_sycl diff --git a/unsupported/test/cxx11_tensor_sycl_broadcast.cpp b/unsupported/test/cxx11_tensor_sycl_broadcast.cpp index 1babbc038..5da77981c 100644 --- a/unsupported/test/cxx11_tensor_sycl_broadcast.cpp +++ b/unsupported/test/cxx11_tensor_sycl_broadcast.cpp @@ -1,13 +1,16 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2016 Benoit Steiner +// Copyright (C) 2016 +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - #define EIGEN_TEST_NO_LONGDOUBLE #define EIGEN_TEST_NO_COMPLEX #define EIGEN_TEST_FUNC cxx11_tensor_sycl_broadcast diff --git a/unsupported/test/cxx11_tensor_sycl_device.cpp b/unsupported/test/cxx11_tensor_sycl_device.cpp index 2c1c17972..6bb39cf6a 100644 --- a/unsupported/test/cxx11_tensor_sycl_device.cpp +++ b/unsupported/test/cxx11_tensor_sycl_device.cpp @@ -1,13 +1,16 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2016 Benoit Steiner +// Copyright (C) 2016 +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - #define EIGEN_TEST_NO_LONGDOUBLE #define EIGEN_TEST_NO_COMPLEX #define EIGEN_TEST_FUNC cxx11_tensor_sycl_device @@ -17,7 +20,6 @@ #include "main.h" #include - void test_sycl_device() { cl::sycl::gpu_selector s; cl::sycl::queue q(s, [=](cl::sycl::exception_list l) { diff --git a/unsupported/test/cxx11_tensor_sycl_forced_eval.cpp b/unsupported/test/cxx11_tensor_sycl_forced_eval.cpp index ee934d4fa..59fe743e0 100644 --- a/unsupported/test/cxx11_tensor_sycl_forced_eval.cpp +++ b/unsupported/test/cxx11_tensor_sycl_forced_eval.cpp @@ -1,7 +1,11 @@ // This file is part of Eigen, a lightweight C++ template library // for linear algebra. // -// Copyright (C) 2016 Benoit Steiner +// Copyright (C) 2016 +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed -- cgit v1.2.3 From 00d4e65f00f6024a59e1d0b0d066a14b80f5e390 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 23 Sep 2016 16:44:45 -0700 Subject: Deleted unused TensorMap data member --- unsupported/Eigen/CXX11/src/Tensor/TensorMap.h | 1 - 1 file changed, 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h index 298a49138..a8e55757e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorMap.h @@ -314,7 +314,6 @@ template class MakePoin private: typename MakePointer_::Type m_data; Dimensions m_dimensions; - size_t is_coverted= size_t(0); }; } // end namespace Eigen -- cgit v1.2.3 From f6ac51a054975475ca1181acdeeaac677c07525c Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 23 Sep 2016 16:45:17 -0700 Subject: Made TensorEvalTo compatible with c++0x again. --- unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h index 3dab6da99..9d5cf1a8c 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorEvalTo.h @@ -119,7 +119,7 @@ struct TensorEvaluator, Device> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ~TensorEvaluator() { } - typedef typename internal::traits>::template MakePointer::Type DevicePointer; + typedef typename internal::traits >::template MakePointer::Type DevicePointer; EIGEN_DEVICE_FUNC const Dimensions& dimensions() const { return m_impl.dimensions(); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(DevicePointer scalar) { -- cgit v1.2.3 From dd602e62c80ede4e193ccb93e395645f0f28e54b Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Tue, 27 Sep 2016 16:21:19 +0100 Subject: Converting alias template to nested struct in order to be compatible with CXX-03 --- unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h index 62c5caf6c..6ffa1f5cc 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h @@ -56,7 +56,9 @@ struct traits > Options = Options_, Flags = compute_tensor_flags::ret | (is_const::value ? 0 : LvalueBit) }; - template using MakePointer = MakePointer; + template struct MakePointer{ + typedef T* Type; + }; }; @@ -72,7 +74,9 @@ struct traits::ret | (is_const::value ? 0: LvalueBit) }; - template using MakePointer = MakePointer_; + template struct MakePointer{ + typedef typename MakePointer_::Type Type; + }; }; @@ -90,7 +94,9 @@ struct traits > Options = Options_, Flags = BaseTraits::Flags }; - template using MakePointer = MakePointer_; + template struct MakePointer{ + typedef typename MakePointer_::Type Type; + }; }; template -- cgit v1.2.3 From 422530946f437b6cfb73a09d3932bc0f3ac8af80 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 30 Sep 2016 08:22:10 -0700 Subject: Renamed the SYCL tests to follow the standard naming convention. --- unsupported/test/CMakeLists.txt | 15 ++-- unsupported/test/cxx11_tensor_broadcast_sycl.cpp | 79 ++++++++++++++++++++++ unsupported/test/cxx11_tensor_device_sycl.cpp | 39 +++++++++++ unsupported/test/cxx11_tensor_forced_eval_sycl.cpp | 68 +++++++++++++++++++ unsupported/test/cxx11_tensor_sycl_broadcast.cpp | 79 ---------------------- unsupported/test/cxx11_tensor_sycl_device.cpp | 39 ----------- unsupported/test/cxx11_tensor_sycl_forced_eval.cpp | 68 ------------------- 7 files changed, 194 insertions(+), 193 deletions(-) create mode 100644 unsupported/test/cxx11_tensor_broadcast_sycl.cpp create mode 100644 unsupported/test/cxx11_tensor_device_sycl.cpp create mode 100644 unsupported/test/cxx11_tensor_forced_eval_sycl.cpp delete mode 100644 unsupported/test/cxx11_tensor_sycl_broadcast.cpp delete mode 100644 unsupported/test/cxx11_tensor_sycl_device.cpp delete mode 100644 unsupported/test/cxx11_tensor_sycl_forced_eval.cpp diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt index 0d7ed1db2..113dd79c1 100644 --- a/unsupported/test/CMakeLists.txt +++ b/unsupported/test/CMakeLists.txt @@ -140,13 +140,6 @@ endif() endif() if(EIGEN_TEST_CXX11) - if(EIGEN_TEST_SYCL) - ei_add_test_sycl(cxx11_tensor_sycl "-std=c++11") - ei_add_test_sycl(cxx11_tensor_sycl_forced_eval "-std=c++11") - ei_add_test_sycl(cxx11_tensor_sycl_broadcast "-std=c++11") - ei_add_test_sycl(cxx11_tensor_sycl_device "-std=c++11") - endif(EIGEN_TEST_SYCL) - # It should be safe to always run these tests as there is some fallback code for # older compiler that don't support cxx11. set(CMAKE_CXX_STANDARD 11) @@ -254,3 +247,11 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA) unset(EIGEN_ADD_TEST_FILENAME_EXTENSION) endif() + + +if(EIGEN_TEST_SYCL) + ei_add_test_sycl(cxx11_tensor_sycl "-std=c++11") + ei_add_test_sycl(cxx11_tensor_forced_eval_sycl "-std=c++11") + ei_add_test_sycl(cxx11_tensor_broadcast_sycl "-std=c++11") + ei_add_test_sycl(cxx11_tensor_device_sycl "-std=c++11") +endif(EIGEN_TEST_SYCL) \ No newline at end of file diff --git a/unsupported/test/cxx11_tensor_broadcast_sycl.cpp b/unsupported/test/cxx11_tensor_broadcast_sycl.cpp new file mode 100644 index 000000000..5da77981c --- /dev/null +++ b/unsupported/test/cxx11_tensor_broadcast_sycl.cpp @@ -0,0 +1,79 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_sycl_broadcast +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int +#define EIGEN_USE_SYCL + +#include "main.h" +#include + +using Eigen::array; +using Eigen::SyclDevice; +using Eigen::Tensor; +using Eigen::TensorMap; + +// Types used in tests: +using TestTensor = Tensor; +using TestTensorMap = TensorMap>; +static void test_sycl_broadcast(){ + + cl::sycl::gpu_selector s; + cl::sycl::queue q(s, [=](cl::sycl::exception_list l) { + for (const auto& e : l) { + try { + std::rethrow_exception(e); + } catch (cl::sycl::exception e) { + std::cout << e.what() << std::endl; + } + } + }); + SyclDevice sycl_device(q); + // BROADCAST test: + array in_range = {{2, 3, 5, 7}}; + array broadcasts = {{2, 3, 1, 4}}; + array out_range; // = in_range * broadcasts + for (size_t i = 0; i < out_range.size(); ++i) + out_range[i] = in_range[i] * broadcasts[i]; + + Tensor input(in_range); + Tensor output(out_range); + + for (int i = 0; i < input.size(); ++i) + input(i) = static_cast(i); + + TensorMap gpu_in(input.data(), in_range); + TensorMap gpu_out(output.data(), out_range); + gpu_out.device(sycl_device) = gpu_in.broadcast(broadcasts); + sycl_device.deallocate(output.data()); + + for (size_t i = 0; i < in_range.size(); ++i) + VERIFY_IS_EQUAL(output.dimension(i), out_range[i]); + + for (int i = 0; i < 4; ++i) { + for (int j = 0; j < 9; ++j) { + for (int k = 0; k < 5; ++k) { + for (int l = 0; l < 28; ++l) { + VERIFY_IS_APPROX(input(i%2,j%3,k%5,l%7), output(i,j,k,l)); + } + } + } + } + printf("Broadcast Test Passed\n"); +} + +void test_cxx11_tensor_sycl_broadcast() { + CALL_SUBTEST(test_sycl_broadcast()); +} diff --git a/unsupported/test/cxx11_tensor_device_sycl.cpp b/unsupported/test/cxx11_tensor_device_sycl.cpp new file mode 100644 index 000000000..6bb39cf6a --- /dev/null +++ b/unsupported/test/cxx11_tensor_device_sycl.cpp @@ -0,0 +1,39 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_sycl_device +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int +#define EIGEN_USE_SYCL + +#include "main.h" +#include + +void test_sycl_device() { + cl::sycl::gpu_selector s; + cl::sycl::queue q(s, [=](cl::sycl::exception_list l) { + for (const auto& e : l) { + try { + std::rethrow_exception(e); + } catch (cl::sycl::exception e) { + std::cout << e.what() << std::endl; + } + } + }); + SyclDevice sycl_device(q); + printf("Helo from ComputeCpp: Device Exists\n"); +} +void test_cxx11_tensor_sycl_device() { + CALL_SUBTEST(test_sycl_device()); +} diff --git a/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp b/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp new file mode 100644 index 000000000..59fe743e0 --- /dev/null +++ b/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp @@ -0,0 +1,68 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 +// Mehdi Goli Codeplay Software Ltd. +// Ralph Potter Codeplay Software Ltd. +// Luke Iwanski Codeplay Software Ltd. +// Contact: +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#define EIGEN_TEST_NO_LONGDOUBLE +#define EIGEN_TEST_NO_COMPLEX +#define EIGEN_TEST_FUNC cxx11_tensor_sycl_forced_eval +#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int +#define EIGEN_USE_SYCL + +#include "main.h" +#include + +using Eigen::Tensor; + +void test_sycl_gpu() { + cl::sycl::gpu_selector s; + cl::sycl::queue q(s, [=](cl::sycl::exception_list l) { + for (const auto& e : l) { + try { + std::rethrow_exception(e); + } catch (cl::sycl::exception e) { + std::cout << e.what() << std::endl; + } + } + }); + SyclDevice sycl_device(q); + + int sizeDim1 = 100; + int sizeDim2 = 200; + int sizeDim3 = 200; + Eigen::array tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; + Eigen::Tensor in1(tensorRange); + Eigen::Tensor in2(tensorRange); + Eigen::Tensor out(tensorRange); + + in1 = in1.random() + in1.constant(10.0f); + in2 = in2.random() + in2.constant(10.0f); + + // creating TensorMap from tensor + Eigen::TensorMap> gpu_in1(in1.data(), tensorRange); + Eigen::TensorMap> gpu_in2(in2.data(), tensorRange); + Eigen::TensorMap> gpu_out(out.data(), tensorRange); + + /// c=(a+b)*b + gpu_out.device(sycl_device) =(gpu_in1 + gpu_in2).eval() * gpu_in2; + sycl_device.deallocate(out.data()); + for (int i = 0; i < sizeDim1; ++i) { + for (int j = 0; j < sizeDim2; ++j) { + for (int k = 0; k < sizeDim3; ++k) { + VERIFY_IS_APPROX(out(i, j, k), + (in1(i, j, k) + in2(i, j, k)) * in2(i, j, k)); + } + } + } + printf("(a+b)*b Test Passed\n"); +} + +void test_cxx11_tensor_sycl_forced_eval() { CALL_SUBTEST(test_sycl_gpu()); } diff --git a/unsupported/test/cxx11_tensor_sycl_broadcast.cpp b/unsupported/test/cxx11_tensor_sycl_broadcast.cpp deleted file mode 100644 index 5da77981c..000000000 --- a/unsupported/test/cxx11_tensor_sycl_broadcast.cpp +++ /dev/null @@ -1,79 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#define EIGEN_TEST_NO_LONGDOUBLE -#define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_tensor_sycl_broadcast -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int -#define EIGEN_USE_SYCL - -#include "main.h" -#include - -using Eigen::array; -using Eigen::SyclDevice; -using Eigen::Tensor; -using Eigen::TensorMap; - -// Types used in tests: -using TestTensor = Tensor; -using TestTensorMap = TensorMap>; -static void test_sycl_broadcast(){ - - cl::sycl::gpu_selector s; - cl::sycl::queue q(s, [=](cl::sycl::exception_list l) { - for (const auto& e : l) { - try { - std::rethrow_exception(e); - } catch (cl::sycl::exception e) { - std::cout << e.what() << std::endl; - } - } - }); - SyclDevice sycl_device(q); - // BROADCAST test: - array in_range = {{2, 3, 5, 7}}; - array broadcasts = {{2, 3, 1, 4}}; - array out_range; // = in_range * broadcasts - for (size_t i = 0; i < out_range.size(); ++i) - out_range[i] = in_range[i] * broadcasts[i]; - - Tensor input(in_range); - Tensor output(out_range); - - for (int i = 0; i < input.size(); ++i) - input(i) = static_cast(i); - - TensorMap gpu_in(input.data(), in_range); - TensorMap gpu_out(output.data(), out_range); - gpu_out.device(sycl_device) = gpu_in.broadcast(broadcasts); - sycl_device.deallocate(output.data()); - - for (size_t i = 0; i < in_range.size(); ++i) - VERIFY_IS_EQUAL(output.dimension(i), out_range[i]); - - for (int i = 0; i < 4; ++i) { - for (int j = 0; j < 9; ++j) { - for (int k = 0; k < 5; ++k) { - for (int l = 0; l < 28; ++l) { - VERIFY_IS_APPROX(input(i%2,j%3,k%5,l%7), output(i,j,k,l)); - } - } - } - } - printf("Broadcast Test Passed\n"); -} - -void test_cxx11_tensor_sycl_broadcast() { - CALL_SUBTEST(test_sycl_broadcast()); -} diff --git a/unsupported/test/cxx11_tensor_sycl_device.cpp b/unsupported/test/cxx11_tensor_sycl_device.cpp deleted file mode 100644 index 6bb39cf6a..000000000 --- a/unsupported/test/cxx11_tensor_sycl_device.cpp +++ /dev/null @@ -1,39 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#define EIGEN_TEST_NO_LONGDOUBLE -#define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_tensor_sycl_device -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int -#define EIGEN_USE_SYCL - -#include "main.h" -#include - -void test_sycl_device() { - cl::sycl::gpu_selector s; - cl::sycl::queue q(s, [=](cl::sycl::exception_list l) { - for (const auto& e : l) { - try { - std::rethrow_exception(e); - } catch (cl::sycl::exception e) { - std::cout << e.what() << std::endl; - } - } - }); - SyclDevice sycl_device(q); - printf("Helo from ComputeCpp: Device Exists\n"); -} -void test_cxx11_tensor_sycl_device() { - CALL_SUBTEST(test_sycl_device()); -} diff --git a/unsupported/test/cxx11_tensor_sycl_forced_eval.cpp b/unsupported/test/cxx11_tensor_sycl_forced_eval.cpp deleted file mode 100644 index 59fe743e0..000000000 --- a/unsupported/test/cxx11_tensor_sycl_forced_eval.cpp +++ /dev/null @@ -1,68 +0,0 @@ -// This file is part of Eigen, a lightweight C++ template library -// for linear algebra. -// -// Copyright (C) 2016 -// Mehdi Goli Codeplay Software Ltd. -// Ralph Potter Codeplay Software Ltd. -// Luke Iwanski Codeplay Software Ltd. -// Contact: -// -// This Source Code Form is subject to the terms of the Mozilla -// Public License v. 2.0. If a copy of the MPL was not distributed -// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. - -#define EIGEN_TEST_NO_LONGDOUBLE -#define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_tensor_sycl_forced_eval -#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int -#define EIGEN_USE_SYCL - -#include "main.h" -#include - -using Eigen::Tensor; - -void test_sycl_gpu() { - cl::sycl::gpu_selector s; - cl::sycl::queue q(s, [=](cl::sycl::exception_list l) { - for (const auto& e : l) { - try { - std::rethrow_exception(e); - } catch (cl::sycl::exception e) { - std::cout << e.what() << std::endl; - } - } - }); - SyclDevice sycl_device(q); - - int sizeDim1 = 100; - int sizeDim2 = 200; - int sizeDim3 = 200; - Eigen::array tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; - Eigen::Tensor in1(tensorRange); - Eigen::Tensor in2(tensorRange); - Eigen::Tensor out(tensorRange); - - in1 = in1.random() + in1.constant(10.0f); - in2 = in2.random() + in2.constant(10.0f); - - // creating TensorMap from tensor - Eigen::TensorMap> gpu_in1(in1.data(), tensorRange); - Eigen::TensorMap> gpu_in2(in2.data(), tensorRange); - Eigen::TensorMap> gpu_out(out.data(), tensorRange); - - /// c=(a+b)*b - gpu_out.device(sycl_device) =(gpu_in1 + gpu_in2).eval() * gpu_in2; - sycl_device.deallocate(out.data()); - for (int i = 0; i < sizeDim1; ++i) { - for (int j = 0; j < sizeDim2; ++j) { - for (int k = 0; k < sizeDim3; ++k) { - VERIFY_IS_APPROX(out(i, j, k), - (in1(i, j, k) + in2(i, j, k)) * in2(i, j, k)); - } - } - } - printf("(a+b)*b Test Passed\n"); -} - -void test_cxx11_tensor_sycl_forced_eval() { CALL_SUBTEST(test_sycl_gpu()); } -- cgit v1.2.3 From c84084c0c0397027d97a3775c068f380bd2cb485 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 5 Oct 2016 14:15:41 -0700 Subject: Fixed compilation warning --- unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h index 6ffa1f5cc..b540b722e 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorTraits.h @@ -28,7 +28,7 @@ class compute_tensor_flags #else 0 #endif - || + | #if EIGEN_MAX_ALIGN_BYTES>0 is_dynamic_size_storage #else -- cgit v1.2.3 From 73b00129451f53a3a701397617c765ec2eb87851 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 5 Oct 2016 14:24:24 -0700 Subject: Fixed compilation warnings --- unsupported/test/cxx11_tensor_device_sycl.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unsupported/test/cxx11_tensor_device_sycl.cpp b/unsupported/test/cxx11_tensor_device_sycl.cpp index 6bb39cf6a..b9368cb84 100644 --- a/unsupported/test/cxx11_tensor_device_sycl.cpp +++ b/unsupported/test/cxx11_tensor_device_sycl.cpp @@ -13,7 +13,7 @@ #define EIGEN_TEST_NO_LONGDOUBLE #define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_tensor_sycl_device +#define EIGEN_TEST_FUNC cxx11_tensor_device_sycl #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_SYCL @@ -34,6 +34,6 @@ void test_sycl_device() { SyclDevice sycl_device(q); printf("Helo from ComputeCpp: Device Exists\n"); } -void test_cxx11_tensor_sycl_device() { +void test_cxx11_tensor_device_sycl() { CALL_SUBTEST(test_sycl_device()); } -- cgit v1.2.3 From d7f9679a34ef991fa3c9da8f61510d6c48aaa19c Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 5 Oct 2016 15:00:32 -0700 Subject: Fixed a couple of compilation warnings --- unsupported/test/cxx11_tensor_broadcast_sycl.cpp | 4 ++-- unsupported/test/cxx11_tensor_forced_eval_sycl.cpp | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/unsupported/test/cxx11_tensor_broadcast_sycl.cpp b/unsupported/test/cxx11_tensor_broadcast_sycl.cpp index 5da77981c..0179ab243 100644 --- a/unsupported/test/cxx11_tensor_broadcast_sycl.cpp +++ b/unsupported/test/cxx11_tensor_broadcast_sycl.cpp @@ -13,7 +13,7 @@ #define EIGEN_TEST_NO_LONGDOUBLE #define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_tensor_sycl_broadcast +#define EIGEN_TEST_FUNC cxx11_tensor_broadcast_sycl #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_SYCL @@ -74,6 +74,6 @@ static void test_sycl_broadcast(){ printf("Broadcast Test Passed\n"); } -void test_cxx11_tensor_sycl_broadcast() { +void test_cxx11_tensor_broadcast_sycl() { CALL_SUBTEST(test_sycl_broadcast()); } diff --git a/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp b/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp index 59fe743e0..7c4446192 100644 --- a/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp +++ b/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp @@ -13,7 +13,7 @@ #define EIGEN_TEST_NO_LONGDOUBLE #define EIGEN_TEST_NO_COMPLEX -#define EIGEN_TEST_FUNC cxx11_tensor_sycl_forced_eval +#define EIGEN_TEST_FUNC cxx11_tensor_forced_eval_sycl #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int #define EIGEN_USE_SYCL @@ -65,4 +65,6 @@ void test_sycl_gpu() { printf("(a+b)*b Test Passed\n"); } -void test_cxx11_tensor_sycl_forced_eval() { CALL_SUBTEST(test_sycl_gpu()); } +void test_cxx11_tensor_forced_eval_sycl() { + CALL_SUBTEST(test_sycl_gpu()); +} -- cgit v1.2.3 From 48c635e22335569465d8977421b614372d8bd852 Mon Sep 17 00:00:00 2001 From: Rasmus Munk Larsen Date: Thu, 6 Oct 2016 10:33:10 -0700 Subject: Add a simple cost model to prevent Eigen's parallel GEMM from using too many threads when the inner dimension is small. Timing for square matrices is unchanged, but both CPU and Wall time are significantly improved for skinny matrices. The benchmarks below are for multiplying NxK * KxN matrices with test names of the form BM_OuterishProd/N/K. Improvements in Wall time: Run on [redacted] (12 X 3501 MHz CPUs); 2016-10-05T17:40:02.462497196-07:00 CPU: Intel Haswell with HyperThreading (6 cores) dL1:32KB dL2:256KB dL3:15MB Benchmark Base (ns) New (ns) Improvement ------------------------------------------------------------------ BM_OuterishProd/64/1 3088 1610 +47.9% BM_OuterishProd/64/4 3562 2414 +32.2% BM_OuterishProd/64/32 8861 7815 +11.8% BM_OuterishProd/128/1 11363 6504 +42.8% BM_OuterishProd/128/4 11128 9794 +12.0% BM_OuterishProd/128/64 27691 27396 +1.1% BM_OuterishProd/256/1 33214 28123 +15.3% BM_OuterishProd/256/4 34312 36818 -7.3% BM_OuterishProd/256/128 174866 176398 -0.9% BM_OuterishProd/512/1 7963684 104224 +98.7% BM_OuterishProd/512/4 7987913 112867 +98.6% BM_OuterishProd/512/256 8198378 1306500 +84.1% BM_OuterishProd/1k/1 7356256 324432 +95.6% BM_OuterishProd/1k/4 8129616 331621 +95.9% BM_OuterishProd/1k/512 27265418 7517538 +72.4% Improvements in CPU time: Run on [redacted] (12 X 3501 MHz CPUs); 2016-10-05T17:40:02.462497196-07:00 CPU: Intel Haswell with HyperThreading (6 cores) dL1:32KB dL2:256KB dL3:15MB Benchmark Base (ns) New (ns) Improvement ------------------------------------------------------------------ BM_OuterishProd/64/1 6169 1608 +73.9% BM_OuterishProd/64/4 7117 2412 +66.1% BM_OuterishProd/64/32 17702 15616 +11.8% BM_OuterishProd/128/1 45415 6498 +85.7% BM_OuterishProd/128/4 44459 9786 +78.0% BM_OuterishProd/128/64 110657 109489 +1.1% BM_OuterishProd/256/1 265158 28101 +89.4% BM_OuterishProd/256/4 274234 183885 +32.9% BM_OuterishProd/256/128 1397160 1408776 -0.8% BM_OuterishProd/512/1 78947048 520703 +99.3% BM_OuterishProd/512/4 86955578 1349742 +98.4% BM_OuterishProd/512/256 74701613 15584661 +79.1% BM_OuterishProd/1k/1 78352601 3877911 +95.1% BM_OuterishProd/1k/4 78521643 3966221 +94.9% BM_OuterishProd/1k/512 258104736 89480530 +65.3% --- Eigen/src/Core/products/GeneralMatrixMatrix.h | 50 +++++++++++++-------------- Eigen/src/Core/products/Parallelizer.h | 19 ++++++---- 2 files changed, 38 insertions(+), 31 deletions(-) diff --git a/Eigen/src/Core/products/GeneralMatrixMatrix.h b/Eigen/src/Core/products/GeneralMatrixMatrix.h index b1465c3b5..61df3be57 100644 --- a/Eigen/src/Core/products/GeneralMatrixMatrix.h +++ b/Eigen/src/Core/products/GeneralMatrixMatrix.h @@ -10,7 +10,7 @@ #ifndef EIGEN_GENERAL_MATRIX_MATRIX_H #define EIGEN_GENERAL_MATRIX_MATRIX_H -namespace Eigen { +namespace Eigen { namespace internal { @@ -24,7 +24,7 @@ template< struct general_matrix_matrix_product { typedef gebp_traits Traits; - + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; static EIGEN_STRONG_INLINE void run( Index rows, Index cols, Index depth, @@ -54,7 +54,7 @@ struct general_matrix_matrix_product Traits; - + typedef typename ScalarBinaryOpTraits::ReturnType ResScalar; static void run(Index rows, Index cols, Index depth, const LhsScalar* _lhs, Index lhsStride, @@ -85,13 +85,13 @@ static void run(Index rows, Index cols, Index depth, // this is the parallel version! Index tid = omp_get_thread_num(); Index threads = omp_get_num_threads(); - + LhsScalar* blockA = blocking.blockA(); eigen_internal_assert(blockA!=0); - + std::size_t sizeB = kc*nc; ei_declare_aligned_stack_constructed_variable(RhsScalar, blockB, sizeB, 0); - + // For each horizontal panel of the rhs, and corresponding vertical panel of the lhs... for(Index k=0; k Pack lhs's panel into a sequential chunk of memory (L2/L3 caching) // Note that this panel will be read as many times as the number of blocks in the rhs's // horizontal panel which is, in practice, a very low number. pack_lhs(blockA, lhs.getSubMapper(i2,k2), actual_kc, actual_mc); - + // For each kc x nc block of the rhs's horizontal panel... for(Index j2=0; j2m_blockB = reinterpret_cast((internal::UIntPtr(m_staticB) + (EIGEN_DEFAULT_ALIGN_BYTES-1)) & ~std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1)); #endif } - + void initParallel(Index, Index, Index, Index) {} @@ -359,14 +359,14 @@ class gemm_blocking_spacem_mc * this->m_kc; m_sizeB = this->m_kc * this->m_nc; } - + void initParallel(Index rows, Index cols, Index depth, Index num_threads) { this->m_mc = Transpose ? cols : rows; this->m_nc = Transpose ? rows : cols; this->m_kc = depth; - - eigen_internal_assert(this->m_blockA==0 && this->m_blockB==0); + + eigen_internal_assert(this->m_blockA==0 && this->m_blockB==0); Index m = this->m_mc; computeProductBlockingSizes(this->m_kc, m, this->m_nc, num_threads); m_sizeA = this->m_mc * this->m_kc; @@ -401,7 +401,7 @@ class gemm_blocking_space struct generic_product_impl : generic_product_impl_base > @@ -409,21 +409,21 @@ struct generic_product_impl typedef typename Product::Scalar Scalar; typedef typename Lhs::Scalar LhsScalar; typedef typename Rhs::Scalar RhsScalar; - + typedef internal::blas_traits LhsBlasTraits; typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; typedef typename internal::remove_all::type ActualLhsTypeCleaned; - + typedef internal::blas_traits RhsBlasTraits; typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; typedef typename internal::remove_all::type ActualRhsTypeCleaned; - + enum { MaxDepthAtCompileTime = EIGEN_SIZE_MIN_PREFER_FIXED(Lhs::MaxColsAtCompileTime,Rhs::MaxRowsAtCompileTime) }; - + typedef generic_product_impl lazyproduct; - + template static void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { @@ -453,7 +453,7 @@ struct generic_product_impl else scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); } - + template static void scaleAndAddTo(Dest& dst, const Lhs& a_lhs, const Rhs& a_rhs, const Scalar& alpha) { @@ -481,7 +481,7 @@ struct generic_product_impl BlockingType blocking(dst.rows(), dst.cols(), lhs.cols(), 1, true); internal::parallelize_gemm<(Dest::MaxRowsAtCompileTime>32 || Dest::MaxRowsAtCompileTime==Dynamic)> - (GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), a_lhs.rows(), a_rhs.cols(), Dest::Flags&RowMajorBit); + (GemmFunctor(lhs, rhs, dst, actualAlpha, blocking), a_lhs.rows(), a_rhs.cols(), a_lhs.cols(), Dest::Flags&RowMajorBit); } }; diff --git a/Eigen/src/Core/products/Parallelizer.h b/Eigen/src/Core/products/Parallelizer.h index e0bfcc356..1dee8d714 100644 --- a/Eigen/src/Core/products/Parallelizer.h +++ b/Eigen/src/Core/products/Parallelizer.h @@ -10,7 +10,7 @@ #ifndef EIGEN_PARALLELIZER_H #define EIGEN_PARALLELIZER_H -namespace Eigen { +namespace Eigen { namespace internal { @@ -83,7 +83,7 @@ template struct GemmParallelInfo }; template -void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpose) +void parallelize_gemm(const Functor& func, Index rows, Index cols, Index depth, bool transpose) { // TODO when EIGEN_USE_BLAS is defined, // we should still enable OMP for other scalar types @@ -92,6 +92,7 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos // the matrix product when multithreading is enabled. This is a temporary // fix to support row-major destination matrices. This whole // parallelizer mechanism has to be redisigned anyway. + EIGEN_UNUSED_VARIABLE(depth); EIGEN_UNUSED_VARIABLE(transpose); func(0,rows, 0,cols); #else @@ -106,6 +107,12 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos // FIXME this has to be fine tuned Index size = transpose ? rows : cols; Index pb_max_threads = std::max(1,size / 32); + // compute the maximal number of threads from the total amount of work: + double work = static_cast(rows) * static_cast(cols) * + static_cast(depth); + double kMinTaskSize = 50000; // Heuristic. + max_threads = std::max(1, std::min(max_threads, work / kMinTaskSize)); + // compute the number of threads we are going to use Index threads = std::min(nbThreads(), pb_max_threads); @@ -120,19 +127,19 @@ void parallelize_gemm(const Functor& func, Index rows, Index cols, bool transpos if(transpose) std::swap(rows,cols); - + ei_declare_aligned_stack_constructed_variable(GemmParallelInfo,info,threads,0); - + #pragma omp parallel num_threads(threads) { Index i = omp_get_thread_num(); // Note that the actual number of threads might be lower than the number of request ones. Index actual_threads = omp_get_num_threads(); - + Index blockCols = (cols / actual_threads) & ~Index(0x3); Index blockRows = (rows / actual_threads); blockRows = (blockRows/Functor::Traits::mr)*Functor::Traits::mr; - + Index r0 = i*blockRows; Index actualBlockRows = (i+1==actual_threads) ? rows-r0 : blockRows; -- cgit v1.2.3 From 86711497c4584534793b186fb0c72f8002a9fe86 Mon Sep 17 00:00:00 2001 From: Robert Lukierski Date: Wed, 12 Oct 2016 16:35:17 +0100 Subject: Adding EIGEN_DEVICE_FUNC in the Geometry module. Additional CUDA necessary fixes in the Core (mostly usage of EIGEN_USING_STD_MATH). --- Eigen/src/Core/AssignEvaluator.h | 3 +- Eigen/src/Core/CwiseNullaryOp.h | 3 +- Eigen/src/Core/DiagonalMatrix.h | 2 +- Eigen/src/Core/MatrixBase.h | 4 +- Eigen/src/Core/PlainObjectBase.h | 10 +- Eigen/src/Core/TriangularMatrix.h | 13 ++- Eigen/src/Core/VectorwiseOp.h | 2 + Eigen/src/Core/functors/BinaryFunctors.h | 2 +- Eigen/src/Core/functors/UnaryFunctors.h | 2 +- Eigen/src/Geometry/AlignedBox.h | 80 +++++++------- Eigen/src/Geometry/AngleAxis.h | 56 +++++----- Eigen/src/Geometry/EulerAngles.h | 8 +- Eigen/src/Geometry/Homogeneous.h | 58 +++++----- Eigen/src/Geometry/Hyperplane.h | 54 +++++----- Eigen/src/Geometry/OrthoMethods.h | 9 +- Eigen/src/Geometry/ParametrizedLine.h | 54 +++++----- Eigen/src/Geometry/Quaternion.h | 162 ++++++++++++++-------------- Eigen/src/Geometry/Rotation2D.h | 48 ++++----- Eigen/src/Geometry/RotationBase.h | 40 +++---- Eigen/src/Geometry/Transform.h | 175 +++++++++++++++++-------------- Eigen/src/Geometry/Translation.h | 50 ++++----- 21 files changed, 434 insertions(+), 401 deletions(-) diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index b7cc7c0e9..30b36be11 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -517,6 +517,7 @@ struct dense_assignment_loop { EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) { + EIGEN_USING_STD_MATH(min) typedef typename Kernel::Scalar Scalar; typedef typename Kernel::PacketType PacketType; enum { @@ -554,7 +555,7 @@ struct dense_assignment_loop for(Index inner = alignedEnd; inner((alignedStart+alignedStep)%packetSize, innerSize); + alignedStart = min((Index)(alignedStart+alignedStep)%packetSize, (Index)innerSize); } } }; diff --git a/Eigen/src/Core/CwiseNullaryOp.h b/Eigen/src/Core/CwiseNullaryOp.h index 25c3ef3d7..4ab32d430 100644 --- a/Eigen/src/Core/CwiseNullaryOp.h +++ b/Eigen/src/Core/CwiseNullaryOp.h @@ -752,7 +752,8 @@ struct setIdentity_impl static EIGEN_STRONG_INLINE Derived& run(Derived& m) { m.setZero(); - const Index size = (std::min)(m.rows(), m.cols()); + EIGEN_USING_STD_MATH(min) + const Index size = min(m.rows(), m.cols()); for(Index i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1); return m; } diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h index 92b2eee71..f04facecc 100644 --- a/Eigen/src/Core/DiagonalMatrix.h +++ b/Eigen/src/Core/DiagonalMatrix.h @@ -290,7 +290,7 @@ MatrixBase::asDiagonal() const template bool MatrixBase::isDiagonal(const RealScalar& prec) const { - using std::abs; + EIGEN_USING_STD_MATH(abs) if(cols() != rows()) return false; RealScalar maxAbsOnDiagonal = static_cast(-1); for(Index j = 0; j < cols(); ++j) diff --git a/Eigen/src/Core/MatrixBase.h b/Eigen/src/Core/MatrixBase.h index d56df8249..f7cf04cde 100644 --- a/Eigen/src/Core/MatrixBase.h +++ b/Eigen/src/Core/MatrixBase.h @@ -399,12 +399,14 @@ template class MatrixBase EIGEN_DEVICE_FUNC inline PlainObject unitOrthogonal(void) const; + EIGEN_DEVICE_FUNC inline Matrix eulerAngles(Index a0, Index a1, Index a2) const; // put this as separate enum value to work around possible GCC 4.3 bug (?) enum { HomogeneousReturnTypeDirection = ColsAtCompileTime==1&&RowsAtCompileTime==1 ? ((internal::traits::Flags&RowMajorBit)==RowMajorBit ? Horizontal : Vertical) : ColsAtCompileTime==1 ? Vertical : Horizontal }; typedef Homogeneous HomogeneousReturnType; + EIGEN_DEVICE_FUNC inline HomogeneousReturnType homogeneous() const; enum { @@ -414,7 +416,7 @@ template class MatrixBase internal::traits::ColsAtCompileTime==1 ? SizeMinusOne : 1, internal::traits::ColsAtCompileTime==1 ? 1 : SizeMinusOne> ConstStartMinusOne; typedef EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(ConstStartMinusOne,Scalar,quotient) HNormalizedReturnType; - + EIGEN_DEVICE_FUNC inline const HNormalizedReturnType hnormalized() const; ////////// Householder module /////////// diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index 55b4ac057..00313920c 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -916,8 +916,9 @@ struct conservative_resize_like_impl { // The storage order does not allow us to use reallocation. typename Derived::PlainObject tmp(rows,cols); - const Index common_rows = (std::min)(rows, _this.rows()); - const Index common_cols = (std::min)(cols, _this.cols()); + EIGEN_USING_STD_MATH(min) + const Index common_rows = min(rows, _this.rows()); + const Index common_cols = min(cols, _this.cols()); tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols); _this.derived().swap(tmp); } @@ -950,8 +951,9 @@ struct conservative_resize_like_impl { // The storage order does not allow us to use reallocation. typename Derived::PlainObject tmp(other); - const Index common_rows = (std::min)(tmp.rows(), _this.rows()); - const Index common_cols = (std::min)(tmp.cols(), _this.cols()); + EIGEN_USING_STD_MATH(min) + const Index common_rows = min(tmp.rows(), _this.rows()); + const Index common_cols = min(tmp.cols(), _this.cols()); tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols); _this.derived().swap(tmp); } diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index e9606ec33..e804cf6bb 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -641,11 +641,12 @@ MatrixBase::triangularView() const template bool MatrixBase::isUpperTriangular(const RealScalar& prec) const { - using std::abs; + EIGEN_USING_STD_MATH(abs) RealScalar maxAbsOnUpperPart = static_cast(-1); + EIGEN_USING_STD_MATH(min) for(Index j = 0; j < cols(); ++j) { - Index maxi = (std::min)(j, rows()-1); + Index maxi = min(j, rows()-1); for(Index i = 0; i <= maxi; ++i) { RealScalar absValue = abs(coeff(i,j)); @@ -667,7 +668,8 @@ bool MatrixBase::isUpperTriangular(const RealScalar& prec) const template bool MatrixBase::isLowerTriangular(const RealScalar& prec) const { - using std::abs; + EIGEN_USING_STD_MATH(abs) + EIGEN_USING_STD_MATH(min) RealScalar maxAbsOnLowerPart = static_cast(-1); for(Index j = 0; j < cols(); ++j) for(Index i = j; i < rows(); ++i) @@ -678,7 +680,7 @@ bool MatrixBase::isLowerTriangular(const RealScalar& prec) const RealScalar threshold = maxAbsOnLowerPart * prec; for(Index j = 1; j < cols(); ++j) { - Index maxi = (std::min)(j, rows()-1); + Index maxi = min(j, rows()-1); for(Index i = 0; i < maxi; ++i) if(abs(coeff(i, j)) > threshold) return false; } @@ -891,9 +893,10 @@ struct triangular_assignment_loop EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) { + EIGEN_USING_STD_MATH(min) for(Index j = 0; j < kernel.cols(); ++j) { - Index maxi = (std::min)(j, kernel.rows()); + Index maxi = min(j, kernel.rows()); Index i = 0; if (((Mode&Lower) && SetOpposite) || (Mode&Upper)) { diff --git a/Eigen/src/Core/VectorwiseOp.h b/Eigen/src/Core/VectorwiseOp.h index dd382e990..14f403594 100644 --- a/Eigen/src/Core/VectorwiseOp.h +++ b/Eigen/src/Core/VectorwiseOp.h @@ -625,6 +625,7 @@ template class VectorwiseOp /////////// Geometry module /////////// typedef Homogeneous HomogeneousReturnType; + EIGEN_DEVICE_FUNC HomogeneousReturnType homogeneous() const; typedef typename ExpressionType::PlainObject CrossReturnType; @@ -654,6 +655,7 @@ template class VectorwiseOp Direction==Horizontal ? HNormalized_SizeMinusOne : 1> > HNormalizedReturnType; + EIGEN_DEVICE_FUNC const HNormalizedReturnType hnormalized() const; protected: diff --git a/Eigen/src/Core/functors/BinaryFunctors.h b/Eigen/src/Core/functors/BinaryFunctors.h index d82ffed02..96747bac7 100644 --- a/Eigen/src/Core/functors/BinaryFunctors.h +++ b/Eigen/src/Core/functors/BinaryFunctors.h @@ -266,7 +266,7 @@ struct scalar_hypot_op : binary_op_base // typedef typename NumTraits::Real result_type; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar operator() (const Scalar& _x, const Scalar& _y) const { - using std::sqrt; + EIGEN_USING_STD_MATH(sqrt) Scalar p, qp; if(_x>_y) { diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index 2009f8e57..2e6a00ffd 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h @@ -321,7 +321,7 @@ struct functor_traits > { */ template struct scalar_log10_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_log10_op) - EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { using std::log10; return log10(a); } + EIGEN_DEVICE_FUNC inline const Scalar operator() (const Scalar& a) const { EIGEN_USING_STD_MATH(log10) return log10(a); } template EIGEN_DEVICE_FUNC inline Packet packetOp(const Packet& a) const { return internal::plog10(a); } }; diff --git a/Eigen/src/Geometry/AlignedBox.h b/Eigen/src/Geometry/AlignedBox.h index d20d17492..066eae4f9 100644 --- a/Eigen/src/Geometry/AlignedBox.h +++ b/Eigen/src/Geometry/AlignedBox.h @@ -62,57 +62,57 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) /** Default constructor initializing a null box. */ - inline AlignedBox() + EIGEN_DEVICE_FUNC inline AlignedBox() { if (AmbientDimAtCompileTime!=Dynamic) setEmpty(); } /** Constructs a null box with \a _dim the dimension of the ambient space. */ - inline explicit AlignedBox(Index _dim) : m_min(_dim), m_max(_dim) + EIGEN_DEVICE_FUNC inline explicit AlignedBox(Index _dim) : m_min(_dim), m_max(_dim) { setEmpty(); } /** Constructs a box with extremities \a _min and \a _max. * \warning If either component of \a _min is larger than the same component of \a _max, the constructed box is empty. */ template - inline AlignedBox(const OtherVectorType1& _min, const OtherVectorType2& _max) : m_min(_min), m_max(_max) {} + EIGEN_DEVICE_FUNC inline AlignedBox(const OtherVectorType1& _min, const OtherVectorType2& _max) : m_min(_min), m_max(_max) {} /** Constructs a box containing a single point \a p. */ template - inline explicit AlignedBox(const MatrixBase& p) : m_min(p), m_max(m_min) + EIGEN_DEVICE_FUNC inline explicit AlignedBox(const MatrixBase& p) : m_min(p), m_max(m_min) { } - ~AlignedBox() {} + EIGEN_DEVICE_FUNC ~AlignedBox() {} /** \returns the dimension in which the box holds */ - inline Index dim() const { return AmbientDimAtCompileTime==Dynamic ? m_min.size() : Index(AmbientDimAtCompileTime); } + EIGEN_DEVICE_FUNC inline Index dim() const { return AmbientDimAtCompileTime==Dynamic ? m_min.size() : Index(AmbientDimAtCompileTime); } /** \deprecated use isEmpty() */ - inline bool isNull() const { return isEmpty(); } + EIGEN_DEVICE_FUNC inline bool isNull() const { return isEmpty(); } /** \deprecated use setEmpty() */ - inline void setNull() { setEmpty(); } + EIGEN_DEVICE_FUNC inline void setNull() { setEmpty(); } /** \returns true if the box is empty. * \sa setEmpty */ - inline bool isEmpty() const { return (m_min.array() > m_max.array()).any(); } + EIGEN_DEVICE_FUNC inline bool isEmpty() const { return (m_min.array() > m_max.array()).any(); } /** Makes \c *this an empty box. * \sa isEmpty */ - inline void setEmpty() + EIGEN_DEVICE_FUNC inline void setEmpty() { m_min.setConstant( ScalarTraits::highest() ); m_max.setConstant( ScalarTraits::lowest() ); } /** \returns the minimal corner */ - inline const VectorType& (min)() const { return m_min; } + EIGEN_DEVICE_FUNC inline const VectorType& (min)() const { return m_min; } /** \returns a non const reference to the minimal corner */ - inline VectorType& (min)() { return m_min; } + EIGEN_DEVICE_FUNC inline VectorType& (min)() { return m_min; } /** \returns the maximal corner */ - inline const VectorType& (max)() const { return m_max; } + EIGEN_DEVICE_FUNC inline const VectorType& (max)() const { return m_max; } /** \returns a non const reference to the maximal corner */ - inline VectorType& (max)() { return m_max; } + EIGEN_DEVICE_FUNC inline VectorType& (max)() { return m_max; } /** \returns the center of the box */ - inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(VectorTypeSum, RealScalar, quotient) + EIGEN_DEVICE_FUNC inline const EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(VectorTypeSum, RealScalar, quotient) center() const { return (m_min+m_max)/RealScalar(2); } @@ -120,18 +120,18 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) * Note that this function does not get the same * result for integral or floating scalar types: see */ - inline const CwiseBinaryOp< internal::scalar_difference_op, const VectorType, const VectorType> sizes() const + EIGEN_DEVICE_FUNC inline const CwiseBinaryOp< internal::scalar_difference_op, const VectorType, const VectorType> sizes() const { return m_max - m_min; } /** \returns the volume of the bounding box */ - inline Scalar volume() const + EIGEN_DEVICE_FUNC inline Scalar volume() const { return sizes().prod(); } /** \returns an expression for the bounding box diagonal vector * if the length of the diagonal is needed: diagonal().norm() * will provide it. */ - inline CwiseBinaryOp< internal::scalar_difference_op, const VectorType, const VectorType> diagonal() const + EIGEN_DEVICE_FUNC inline CwiseBinaryOp< internal::scalar_difference_op, const VectorType, const VectorType> diagonal() const { return sizes(); } /** \returns the vertex of the bounding box at the corner defined by @@ -143,7 +143,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) * For 3D bounding boxes, the following names are added: * BottomLeftCeil, BottomRightCeil, TopLeftCeil, TopRightCeil. */ - inline VectorType corner(CornerType corner) const + EIGEN_DEVICE_FUNC inline VectorType corner(CornerType corner) const { EIGEN_STATIC_ASSERT(_AmbientDim <= 3, THIS_METHOD_IS_ONLY_FOR_VECTORS_OF_A_SPECIFIC_SIZE); @@ -161,7 +161,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) /** \returns a random point inside the bounding box sampled with * a uniform distribution */ - inline VectorType sample() const + EIGEN_DEVICE_FUNC inline VectorType sample() const { VectorType r(dim()); for(Index d=0; d - inline bool contains(const MatrixBase& p) const + EIGEN_DEVICE_FUNC inline bool contains(const MatrixBase& p) const { typename internal::nested_eval::type p_n(p.derived()); return (m_min.array()<=p_n.array()).all() && (p_n.array()<=m_max.array()).all(); } /** \returns true if the box \a b is entirely inside the box \c *this. */ - inline bool contains(const AlignedBox& b) const + EIGEN_DEVICE_FUNC inline bool contains(const AlignedBox& b) const { return (m_min.array()<=(b.min)().array()).all() && ((b.max)().array()<=m_max.array()).all(); } /** \returns true if the box \a b is intersecting the box \c *this. * \sa intersection, clamp */ - inline bool intersects(const AlignedBox& b) const + EIGEN_DEVICE_FUNC inline bool intersects(const AlignedBox& b) const { return (m_min.array()<=(b.max)().array()).all() && ((b.min)().array()<=m_max.array()).all(); } /** Extends \c *this such that it contains the point \a p and returns a reference to \c *this. * \sa extend(const AlignedBox&) */ template - inline AlignedBox& extend(const MatrixBase& p) + EIGEN_DEVICE_FUNC inline AlignedBox& extend(const MatrixBase& p) { typename internal::nested_eval::type p_n(p.derived()); m_min = m_min.cwiseMin(p_n); @@ -207,7 +207,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) /** Extends \c *this such that it contains the box \a b and returns a reference to \c *this. * \sa merged, extend(const MatrixBase&) */ - inline AlignedBox& extend(const AlignedBox& b) + EIGEN_DEVICE_FUNC inline AlignedBox& extend(const AlignedBox& b) { m_min = m_min.cwiseMin(b.m_min); m_max = m_max.cwiseMax(b.m_max); @@ -217,7 +217,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) /** Clamps \c *this by the box \a b and returns a reference to \c *this. * \note If the boxes don't intersect, the resulting box is empty. * \sa intersection(), intersects() */ - inline AlignedBox& clamp(const AlignedBox& b) + EIGEN_DEVICE_FUNC inline AlignedBox& clamp(const AlignedBox& b) { m_min = m_min.cwiseMax(b.m_min); m_max = m_max.cwiseMin(b.m_max); @@ -227,18 +227,18 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) /** Returns an AlignedBox that is the intersection of \a b and \c *this * \note If the boxes don't intersect, the resulting box is empty. * \sa intersects(), clamp, contains() */ - inline AlignedBox intersection(const AlignedBox& b) const + EIGEN_DEVICE_FUNC inline AlignedBox intersection(const AlignedBox& b) const {return AlignedBox(m_min.cwiseMax(b.m_min), m_max.cwiseMin(b.m_max)); } /** Returns an AlignedBox that is the union of \a b and \c *this. * \note Merging with an empty box may result in a box bigger than \c *this. * \sa extend(const AlignedBox&) */ - inline AlignedBox merged(const AlignedBox& b) const + EIGEN_DEVICE_FUNC inline AlignedBox merged(const AlignedBox& b) const { return AlignedBox(m_min.cwiseMin(b.m_min), m_max.cwiseMax(b.m_max)); } /** Translate \c *this by the vector \a t and returns a reference to \c *this. */ template - inline AlignedBox& translate(const MatrixBase& a_t) + EIGEN_DEVICE_FUNC inline AlignedBox& translate(const MatrixBase& a_t) { const typename internal::nested_eval::type t(a_t.derived()); m_min += t; @@ -251,28 +251,28 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) * \sa exteriorDistance(const MatrixBase&), squaredExteriorDistance(const AlignedBox&) */ template - inline Scalar squaredExteriorDistance(const MatrixBase& p) const; + EIGEN_DEVICE_FUNC inline Scalar squaredExteriorDistance(const MatrixBase& p) const; /** \returns the squared distance between the boxes \a b and \c *this, * and zero if the boxes intersect. * \sa exteriorDistance(const AlignedBox&), squaredExteriorDistance(const MatrixBase&) */ - inline Scalar squaredExteriorDistance(const AlignedBox& b) const; + EIGEN_DEVICE_FUNC inline Scalar squaredExteriorDistance(const AlignedBox& b) const; /** \returns the distance between the point \a p and the box \c *this, * and zero if \a p is inside the box. * \sa squaredExteriorDistance(const MatrixBase&), exteriorDistance(const AlignedBox&) */ template - inline NonInteger exteriorDistance(const MatrixBase& p) const - { using std::sqrt; return sqrt(NonInteger(squaredExteriorDistance(p))); } + EIGEN_DEVICE_FUNC inline NonInteger exteriorDistance(const MatrixBase& p) const + { EIGEN_USING_STD_MATH(sqrt) return sqrt(NonInteger(squaredExteriorDistance(p))); } /** \returns the distance between the boxes \a b and \c *this, * and zero if the boxes intersect. * \sa squaredExteriorDistance(const AlignedBox&), exteriorDistance(const MatrixBase&) */ - inline NonInteger exteriorDistance(const AlignedBox& b) const - { using std::sqrt; return sqrt(NonInteger(squaredExteriorDistance(b))); } + EIGEN_DEVICE_FUNC inline NonInteger exteriorDistance(const AlignedBox& b) const + { EIGEN_USING_STD_MATH(sqrt) return sqrt(NonInteger(squaredExteriorDistance(b))); } /** \returns \c *this with scalar type casted to \a NewScalarType * @@ -280,7 +280,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) * then this function smartly returns a const reference to \c *this. */ template - inline typename internal::cast_return_type >::type cast() const { return typename internal::cast_return_type - inline explicit AlignedBox(const AlignedBox& other) + EIGEN_DEVICE_FUNC inline explicit AlignedBox(const AlignedBox& other) { m_min = (other.min)().template cast(); m_max = (other.max)().template cast(); @@ -299,7 +299,7 @@ EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(_Scalar,_AmbientDim) * determined by \a prec. * * \sa MatrixBase::isApprox() */ - bool isApprox(const AlignedBox& other, const RealScalar& prec = ScalarTraits::dummy_precision()) const + EIGEN_DEVICE_FUNC bool isApprox(const AlignedBox& other, const RealScalar& prec = ScalarTraits::dummy_precision()) const { return m_min.isApprox(other.m_min, prec) && m_max.isApprox(other.m_max, prec); } protected: @@ -311,7 +311,7 @@ protected: template template -inline Scalar AlignedBox::squaredExteriorDistance(const MatrixBase& a_p) const +EIGEN_DEVICE_FUNC inline Scalar AlignedBox::squaredExteriorDistance(const MatrixBase& a_p) const { typename internal::nested_eval::type p(a_p.derived()); Scalar dist2(0); @@ -333,7 +333,7 @@ inline Scalar AlignedBox::squaredExteriorDistance(const Matri } template -inline Scalar AlignedBox::squaredExteriorDistance(const AlignedBox& b) const +EIGEN_DEVICE_FUNC inline Scalar AlignedBox::squaredExteriorDistance(const AlignedBox& b) const { Scalar dist2(0); Scalar aux; diff --git a/Eigen/src/Geometry/AngleAxis.h b/Eigen/src/Geometry/AngleAxis.h index 571062d00..0af3c1b08 100644 --- a/Eigen/src/Geometry/AngleAxis.h +++ b/Eigen/src/Geometry/AngleAxis.h @@ -69,59 +69,61 @@ protected: public: /** Default constructor without initialization. */ - AngleAxis() {} + EIGEN_DEVICE_FUNC AngleAxis() {} /** Constructs and initialize the angle-axis rotation from an \a angle in radian * and an \a axis which \b must \b be \b normalized. * * \warning If the \a axis vector is not normalized, then the angle-axis object * represents an invalid rotation. */ template + EIGEN_DEVICE_FUNC inline AngleAxis(const Scalar& angle, const MatrixBase& axis) : m_axis(axis), m_angle(angle) {} /** Constructs and initialize the angle-axis rotation from a quaternion \a q. * This function implicitly normalizes the quaternion \a q. */ - template inline explicit AngleAxis(const QuaternionBase& q) { *this = q; } + template + EIGEN_DEVICE_FUNC inline explicit AngleAxis(const QuaternionBase& q) { *this = q; } /** Constructs and initialize the angle-axis rotation from a 3x3 rotation matrix. */ template - inline explicit AngleAxis(const MatrixBase& m) { *this = m; } + EIGEN_DEVICE_FUNC inline explicit AngleAxis(const MatrixBase& m) { *this = m; } /** \returns the value of the rotation angle in radian */ - Scalar angle() const { return m_angle; } + EIGEN_DEVICE_FUNC Scalar angle() const { return m_angle; } /** \returns a read-write reference to the stored angle in radian */ - Scalar& angle() { return m_angle; } + EIGEN_DEVICE_FUNC Scalar& angle() { return m_angle; } /** \returns the rotation axis */ - const Vector3& axis() const { return m_axis; } + EIGEN_DEVICE_FUNC const Vector3& axis() const { return m_axis; } /** \returns a read-write reference to the stored rotation axis. * * \warning The rotation axis must remain a \b unit vector. */ - Vector3& axis() { return m_axis; } + EIGEN_DEVICE_FUNC Vector3& axis() { return m_axis; } /** Concatenates two rotations */ - inline QuaternionType operator* (const AngleAxis& other) const + EIGEN_DEVICE_FUNC inline QuaternionType operator* (const AngleAxis& other) const { return QuaternionType(*this) * QuaternionType(other); } /** Concatenates two rotations */ - inline QuaternionType operator* (const QuaternionType& other) const + EIGEN_DEVICE_FUNC inline QuaternionType operator* (const QuaternionType& other) const { return QuaternionType(*this) * other; } /** Concatenates two rotations */ - friend inline QuaternionType operator* (const QuaternionType& a, const AngleAxis& b) + friend EIGEN_DEVICE_FUNC inline QuaternionType operator* (const QuaternionType& a, const AngleAxis& b) { return a * QuaternionType(b); } /** \returns the inverse rotation, i.e., an angle-axis with opposite rotation angle */ - AngleAxis inverse() const + EIGEN_DEVICE_FUNC AngleAxis inverse() const { return AngleAxis(-m_angle, m_axis); } template - AngleAxis& operator=(const QuaternionBase& q); + EIGEN_DEVICE_FUNC AngleAxis& operator=(const QuaternionBase& q); template - AngleAxis& operator=(const MatrixBase& m); + EIGEN_DEVICE_FUNC AngleAxis& operator=(const MatrixBase& m); template - AngleAxis& fromRotationMatrix(const MatrixBase& m); - Matrix3 toRotationMatrix(void) const; + EIGEN_DEVICE_FUNC AngleAxis& fromRotationMatrix(const MatrixBase& m); + EIGEN_DEVICE_FUNC Matrix3 toRotationMatrix(void) const; /** \returns \c *this with scalar type casted to \a NewScalarType * @@ -129,24 +131,24 @@ public: * then this function smartly returns a const reference to \c *this. */ template - inline typename internal::cast_return_type >::type cast() const + EIGEN_DEVICE_FUNC inline typename internal::cast_return_type >::type cast() const { return typename internal::cast_return_type >::type(*this); } /** Copy constructor with scalar type conversion */ template - inline explicit AngleAxis(const AngleAxis& other) + EIGEN_DEVICE_FUNC inline explicit AngleAxis(const AngleAxis& other) { m_axis = other.axis().template cast(); m_angle = Scalar(other.angle()); } - static inline const AngleAxis Identity() { return AngleAxis(Scalar(0), Vector3::UnitX()); } + EIGEN_DEVICE_FUNC static inline const AngleAxis Identity() { return AngleAxis(Scalar(0), Vector3::UnitX()); } /** \returns \c true if \c *this is approximately equal to \a other, within the precision * determined by \a prec. * * \sa MatrixBase::isApprox() */ - bool isApprox(const AngleAxis& other, const typename NumTraits::Real& prec = NumTraits::dummy_precision()) const + EIGEN_DEVICE_FUNC bool isApprox(const AngleAxis& other, const typename NumTraits::Real& prec = NumTraits::dummy_precision()) const { return m_axis.isApprox(other.m_axis, prec) && internal::isApprox(m_angle,other.m_angle, prec); } }; @@ -165,10 +167,10 @@ typedef AngleAxis AngleAxisd; */ template template -AngleAxis& AngleAxis::operator=(const QuaternionBase& q) +EIGEN_DEVICE_FUNC AngleAxis& AngleAxis::operator=(const QuaternionBase& q) { - using std::atan2; - using std::abs; + EIGEN_USING_STD_MATH(atan2) + EIGEN_USING_STD_MATH(abs) Scalar n = q.vec().norm(); if(n::epsilon()) n = q.vec().stableNorm(); @@ -192,7 +194,7 @@ AngleAxis& AngleAxis::operator=(const QuaternionBase template -AngleAxis& AngleAxis::operator=(const MatrixBase& mat) +EIGEN_DEVICE_FUNC AngleAxis& AngleAxis::operator=(const MatrixBase& mat) { // Since a direct conversion would not be really faster, // let's use the robust Quaternion implementation: @@ -204,7 +206,7 @@ AngleAxis& AngleAxis::operator=(const MatrixBase& mat) **/ template template -AngleAxis& AngleAxis::fromRotationMatrix(const MatrixBase& mat) +EIGEN_DEVICE_FUNC AngleAxis& AngleAxis::fromRotationMatrix(const MatrixBase& mat) { return *this = QuaternionType(mat); } @@ -213,10 +215,10 @@ AngleAxis& AngleAxis::fromRotationMatrix(const MatrixBase typename AngleAxis::Matrix3 -AngleAxis::toRotationMatrix(void) const +EIGEN_DEVICE_FUNC AngleAxis::toRotationMatrix(void) const { - using std::sin; - using std::cos; + EIGEN_USING_STD_MATH(sin) + EIGEN_USING_STD_MATH(cos) Matrix3 res; Vector3 sin_axis = sin(m_angle) * m_axis; Scalar c = cos(m_angle); diff --git a/Eigen/src/Geometry/EulerAngles.h b/Eigen/src/Geometry/EulerAngles.h index 4865e58aa..c633268af 100644 --- a/Eigen/src/Geometry/EulerAngles.h +++ b/Eigen/src/Geometry/EulerAngles.h @@ -33,12 +33,12 @@ namespace Eigen { * \sa class AngleAxis */ template -inline Matrix::Scalar,3,1> +EIGEN_DEVICE_FUNC inline Matrix::Scalar,3,1> MatrixBase::eulerAngles(Index a0, Index a1, Index a2) const { - using std::atan2; - using std::sin; - using std::cos; + EIGEN_USING_STD_MATH(atan2) + EIGEN_USING_STD_MATH(sin) + EIGEN_USING_STD_MATH(cos) /* Implemented from Graphics Gems IV */ EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Derived,3,3) diff --git a/Eigen/src/Geometry/Homogeneous.h b/Eigen/src/Geometry/Homogeneous.h index a23068c8d..804e5da73 100644 --- a/Eigen/src/Geometry/Homogeneous.h +++ b/Eigen/src/Geometry/Homogeneous.h @@ -68,17 +68,17 @@ template class Homogeneous typedef MatrixBase Base; EIGEN_DENSE_PUBLIC_INTERFACE(Homogeneous) - explicit inline Homogeneous(const MatrixType& matrix) + EIGEN_DEVICE_FUNC explicit inline Homogeneous(const MatrixType& matrix) : m_matrix(matrix) {} - inline Index rows() const { return m_matrix.rows() + (int(Direction)==Vertical ? 1 : 0); } - inline Index cols() const { return m_matrix.cols() + (int(Direction)==Horizontal ? 1 : 0); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_matrix.rows() + (int(Direction)==Vertical ? 1 : 0); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_matrix.cols() + (int(Direction)==Horizontal ? 1 : 0); } - const NestedExpression& nestedExpression() const { return m_matrix; } + EIGEN_DEVICE_FUNC const NestedExpression& nestedExpression() const { return m_matrix; } template - inline const Product + EIGEN_DEVICE_FUNC inline const Product operator* (const MatrixBase& rhs) const { eigen_assert(int(Direction)==Horizontal); @@ -86,7 +86,7 @@ template class Homogeneous } template friend - inline const Product + EIGEN_DEVICE_FUNC inline const Product operator* (const MatrixBase& lhs, const Homogeneous& rhs) { eigen_assert(int(Direction)==Vertical); @@ -94,7 +94,7 @@ template class Homogeneous } template friend - inline const Product, Homogeneous > + EIGEN_DEVICE_FUNC inline const Product, Homogeneous > operator* (const Transform& lhs, const Homogeneous& rhs) { eigen_assert(int(Direction)==Vertical); @@ -102,7 +102,7 @@ template class Homogeneous } template - EIGEN_STRONG_INLINE typename internal::result_of::type + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::result_of::type redux(const Func& func) const { return func(m_matrix.redux(func), Scalar(1)); @@ -124,7 +124,7 @@ template class Homogeneous * \sa VectorwiseOp::homogeneous(), class Homogeneous */ template -inline typename MatrixBase::HomogeneousReturnType +EIGEN_DEVICE_FUNC inline typename MatrixBase::HomogeneousReturnType MatrixBase::homogeneous() const { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); @@ -140,7 +140,7 @@ MatrixBase::homogeneous() const * * \sa MatrixBase::homogeneous(), class Homogeneous */ template -inline Homogeneous +EIGEN_DEVICE_FUNC inline Homogeneous VectorwiseOp::homogeneous() const { return HomogeneousReturnType(_expression()); @@ -155,7 +155,7 @@ VectorwiseOp::homogeneous() const * * \sa VectorwiseOp::hnormalized() */ template -inline const typename MatrixBase::HNormalizedReturnType +EIGEN_DEVICE_FUNC inline const typename MatrixBase::HNormalizedReturnType MatrixBase::hnormalized() const { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); @@ -173,7 +173,7 @@ MatrixBase::hnormalized() const * * \sa MatrixBase::hnormalized() */ template -inline const typename VectorwiseOp::HNormalizedReturnType +EIGEN_DEVICE_FUNC inline const typename VectorwiseOp::HNormalizedReturnType VectorwiseOp::hnormalized() const { return HNormalized_Block(_expression(),0,0, @@ -197,7 +197,7 @@ template struct take_matrix_for_product { typedef MatrixOrTransformType type; - static const type& run(const type &x) { return x; } + EIGEN_DEVICE_FUNC static const type& run(const type &x) { return x; } }; template @@ -205,7 +205,7 @@ struct take_matrix_for_product > { typedef Transform TransformType; typedef typename internal::add_const::type type; - static type run (const TransformType& x) { return x.affine(); } + EIGEN_DEVICE_FUNC static type run (const TransformType& x) { return x.affine(); } }; template @@ -213,7 +213,7 @@ struct take_matrix_for_product > { typedef Transform TransformType; typedef typename TransformType::MatrixType type; - static const type& run (const TransformType& x) { return x.matrix(); } + EIGEN_DEVICE_FUNC static const type& run (const TransformType& x) { return x.matrix(); } }; template @@ -238,15 +238,15 @@ struct homogeneous_left_product_impl,Lhs> typedef typename traits::LhsMatrixType LhsMatrixType; typedef typename remove_all::type LhsMatrixTypeCleaned; typedef typename remove_all::type LhsMatrixTypeNested; - homogeneous_left_product_impl(const Lhs& lhs, const MatrixType& rhs) + EIGEN_DEVICE_FUNC homogeneous_left_product_impl(const Lhs& lhs, const MatrixType& rhs) : m_lhs(take_matrix_for_product::run(lhs)), m_rhs(rhs) {} - inline Index rows() const { return m_lhs.rows(); } - inline Index cols() const { return m_rhs.cols(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); } - template void evalTo(Dest& dst) const + template EIGEN_DEVICE_FUNC void evalTo(Dest& dst) const { // FIXME investigate how to allow lazy evaluation of this product when possible dst = Block,Rhs> : public ReturnByValue,Rhs> > { typedef typename remove_all::type RhsNested; - homogeneous_right_product_impl(const MatrixType& lhs, const Rhs& rhs) + EIGEN_DEVICE_FUNC homogeneous_right_product_impl(const MatrixType& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) {} - inline Index rows() const { return m_lhs.rows(); } - inline Index cols() const { return m_rhs.cols(); } + EIGEN_DEVICE_FUNC inline Index rows() const { return m_lhs.rows(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_rhs.cols(); } - template void evalTo(Dest& dst) const + template EIGEN_DEVICE_FUNC void evalTo(Dest& dst) const { // FIXME investigate how to allow lazy evaluation of this product when possible dst = m_lhs * Block, IndexBased> typedef typename XprType::PlainObject PlainObject; typedef evaluator Base; - explicit unary_evaluator(const XprType& op) + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) : Base(), m_temp(op) { ::new (static_cast(this)) Base(m_temp); @@ -332,7 +332,7 @@ template< typename DstXprType, typename ArgType, typename Scalar> struct Assignment, internal::assign_op, Dense2Dense> { typedef Homogeneous SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst.template topRows(src.nestedExpression().rows()) = src.nestedExpression(); dst.row(dst.rows()-1).setOnes(); @@ -344,7 +344,7 @@ template< typename DstXprType, typename ArgType, typename Scalar> struct Assignment, internal::assign_op, Dense2Dense> { typedef Homogeneous SrcXprType; - static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) + EIGEN_DEVICE_FUNC static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { dst.template leftCols(src.nestedExpression().cols()) = src.nestedExpression(); dst.col(dst.cols()-1).setOnes(); @@ -355,7 +355,7 @@ template struct generic_product_impl, Rhs, HomogeneousShape, DenseShape, ProductTag> { template - static void evalTo(Dest& dst, const Homogeneous& lhs, const Rhs& rhs) + EIGEN_DEVICE_FUNC static void evalTo(Dest& dst, const Homogeneous& lhs, const Rhs& rhs) { homogeneous_right_product_impl, Rhs>(lhs.nestedExpression(), rhs).evalTo(dst); } @@ -396,7 +396,7 @@ template struct generic_product_impl, DenseShape, HomogeneousShape, ProductTag> { template - static void evalTo(Dest& dst, const Lhs& lhs, const Homogeneous& rhs) + EIGEN_DEVICE_FUNC static void evalTo(Dest& dst, const Lhs& lhs, const Homogeneous& rhs) { homogeneous_left_product_impl, Lhs>(lhs, rhs.nestedExpression()).evalTo(dst); } @@ -450,7 +450,7 @@ struct generic_product_impl, Homogeneous TransformType; template - static void evalTo(Dest& dst, const TransformType& lhs, const Homogeneous& rhs) + EIGEN_DEVICE_FUNC static void evalTo(Dest& dst, const TransformType& lhs, const Homogeneous& rhs) { homogeneous_left_product_impl, TransformType>(lhs, rhs.nestedExpression()).evalTo(dst); } diff --git a/Eigen/src/Geometry/Hyperplane.h b/Eigen/src/Geometry/Hyperplane.h index cc89639b6..d66194287 100644 --- a/Eigen/src/Geometry/Hyperplane.h +++ b/Eigen/src/Geometry/Hyperplane.h @@ -50,21 +50,21 @@ public: typedef const Block ConstNormalReturnType; /** Default constructor without initialization */ - inline Hyperplane() {} + EIGEN_DEVICE_FUNC inline Hyperplane() {} template - Hyperplane(const Hyperplane& other) + EIGEN_DEVICE_FUNC Hyperplane(const Hyperplane& other) : m_coeffs(other.coeffs()) {} /** Constructs a dynamic-size hyperplane with \a _dim the dimension * of the ambient space */ - inline explicit Hyperplane(Index _dim) : m_coeffs(_dim+1) {} + EIGEN_DEVICE_FUNC inline explicit Hyperplane(Index _dim) : m_coeffs(_dim+1) {} /** Construct a plane from its normal \a n and a point \a e onto the plane. * \warning the vector normal is assumed to be normalized. */ - inline Hyperplane(const VectorType& n, const VectorType& e) + EIGEN_DEVICE_FUNC inline Hyperplane(const VectorType& n, const VectorType& e) : m_coeffs(n.size()+1) { normal() = n; @@ -75,7 +75,7 @@ public: * such that the algebraic equation of the plane is \f$ n \cdot x + d = 0 \f$. * \warning the vector normal is assumed to be normalized. */ - inline Hyperplane(const VectorType& n, const Scalar& d) + EIGEN_DEVICE_FUNC inline Hyperplane(const VectorType& n, const Scalar& d) : m_coeffs(n.size()+1) { normal() = n; @@ -85,7 +85,7 @@ public: /** Constructs a hyperplane passing through the two points. If the dimension of the ambient space * is greater than 2, then there isn't uniqueness, so an arbitrary choice is made. */ - static inline Hyperplane Through(const VectorType& p0, const VectorType& p1) + EIGEN_DEVICE_FUNC static inline Hyperplane Through(const VectorType& p0, const VectorType& p1) { Hyperplane result(p0.size()); result.normal() = (p1 - p0).unitOrthogonal(); @@ -96,7 +96,7 @@ public: /** Constructs a hyperplane passing through the three points. The dimension of the ambient space * is required to be exactly 3. */ - static inline Hyperplane Through(const VectorType& p0, const VectorType& p1, const VectorType& p2) + EIGEN_DEVICE_FUNC static inline Hyperplane Through(const VectorType& p0, const VectorType& p1, const VectorType& p2) { EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 3) Hyperplane result(p0.size()); @@ -120,19 +120,19 @@ public: * so an arbitrary choice is made. */ // FIXME to be consitent with the rest this could be implemented as a static Through function ?? - explicit Hyperplane(const ParametrizedLine& parametrized) + EIGEN_DEVICE_FUNC explicit Hyperplane(const ParametrizedLine& parametrized) { normal() = parametrized.direction().unitOrthogonal(); offset() = -parametrized.origin().dot(normal()); } - ~Hyperplane() {} + EIGEN_DEVICE_FUNC ~Hyperplane() {} /** \returns the dimension in which the plane holds */ - inline Index dim() const { return AmbientDimAtCompileTime==Dynamic ? m_coeffs.size()-1 : Index(AmbientDimAtCompileTime); } + EIGEN_DEVICE_FUNC inline Index dim() const { return AmbientDimAtCompileTime==Dynamic ? m_coeffs.size()-1 : Index(AmbientDimAtCompileTime); } /** normalizes \c *this */ - void normalize(void) + EIGEN_DEVICE_FUNC void normalize(void) { m_coeffs /= normal().norm(); } @@ -140,45 +140,45 @@ public: /** \returns the signed distance between the plane \c *this and a point \a p. * \sa absDistance() */ - inline Scalar signedDistance(const VectorType& p) const { return normal().dot(p) + offset(); } + EIGEN_DEVICE_FUNC inline Scalar signedDistance(const VectorType& p) const { return normal().dot(p) + offset(); } /** \returns the absolute distance between the plane \c *this and a point \a p. * \sa signedDistance() */ - inline Scalar absDistance(const VectorType& p) const { using std::abs; return abs(signedDistance(p)); } + EIGEN_DEVICE_FUNC inline Scalar absDistance(const VectorType& p) const { EIGEN_USING_STD_MATH(abs) return abs(signedDistance(p)); } /** \returns the projection of a point \a p onto the plane \c *this. */ - inline VectorType projection(const VectorType& p) const { return p - signedDistance(p) * normal(); } + EIGEN_DEVICE_FUNC inline VectorType projection(const VectorType& p) const { return p - signedDistance(p) * normal(); } /** \returns a constant reference to the unit normal vector of the plane, which corresponds * to the linear part of the implicit equation. */ - inline ConstNormalReturnType normal() const { return ConstNormalReturnType(m_coeffs,0,0,dim(),1); } + EIGEN_DEVICE_FUNC inline ConstNormalReturnType normal() const { return ConstNormalReturnType(m_coeffs,0,0,dim(),1); } /** \returns a non-constant reference to the unit normal vector of the plane, which corresponds * to the linear part of the implicit equation. */ - inline NormalReturnType normal() { return NormalReturnType(m_coeffs,0,0,dim(),1); } + EIGEN_DEVICE_FUNC inline NormalReturnType normal() { return NormalReturnType(m_coeffs,0,0,dim(),1); } /** \returns the distance to the origin, which is also the "constant term" of the implicit equation * \warning the vector normal is assumed to be normalized. */ - inline const Scalar& offset() const { return m_coeffs.coeff(dim()); } + EIGEN_DEVICE_FUNC inline const Scalar& offset() const { return m_coeffs.coeff(dim()); } /** \returns a non-constant reference to the distance to the origin, which is also the constant part * of the implicit equation */ - inline Scalar& offset() { return m_coeffs(dim()); } + EIGEN_DEVICE_FUNC inline Scalar& offset() { return m_coeffs(dim()); } /** \returns a constant reference to the coefficients c_i of the plane equation: * \f$ c_0*x_0 + ... + c_{d-1}*x_{d-1} + c_d = 0 \f$ */ - inline const Coefficients& coeffs() const { return m_coeffs; } + EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs; } /** \returns a non-constant reference to the coefficients c_i of the plane equation: * \f$ c_0*x_0 + ... + c_{d-1}*x_{d-1} + c_d = 0 \f$ */ - inline Coefficients& coeffs() { return m_coeffs; } + EIGEN_DEVICE_FUNC inline Coefficients& coeffs() { return m_coeffs; } /** \returns the intersection of *this with \a other. * @@ -186,9 +186,9 @@ public: * * \note If \a other is approximately parallel to *this, this method will return any point on *this. */ - VectorType intersection(const Hyperplane& other) const + EIGEN_DEVICE_FUNC VectorType intersection(const Hyperplane& other) const { - using std::abs; + EIGEN_USING_STD_MATH(abs) EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 2) Scalar det = coeffs().coeff(0) * other.coeffs().coeff(1) - coeffs().coeff(1) * other.coeffs().coeff(0); // since the line equations ax+by=c are normalized with a^2+b^2=1, the following tests @@ -215,7 +215,7 @@ public: * or a more generic #Affine transformation. The default is #Affine. */ template - inline Hyperplane& transform(const MatrixBase& mat, TransformTraits traits = Affine) + EIGEN_DEVICE_FUNC inline Hyperplane& transform(const MatrixBase& mat, TransformTraits traits = Affine) { if (traits==Affine) normal() = mat.inverse().transpose() * normal(); @@ -236,7 +236,7 @@ public: * Other kind of transformations are not supported. */ template - inline Hyperplane& transform(const Transform& t, + EIGEN_DEVICE_FUNC inline Hyperplane& transform(const Transform& t, TransformTraits traits = Affine) { transform(t.linear(), traits); @@ -250,7 +250,7 @@ public: * then this function smartly returns a const reference to \c *this. */ template - inline typename internal::cast_return_type >::type cast() const { return typename internal::cast_return_type - inline explicit Hyperplane(const Hyperplane& other) + EIGEN_DEVICE_FUNC inline explicit Hyperplane(const Hyperplane& other) { m_coeffs = other.coeffs().template cast(); } /** \returns \c true if \c *this is approximately equal to \a other, within the precision @@ -267,7 +267,7 @@ public: * * \sa MatrixBase::isApprox() */ template - bool isApprox(const Hyperplane& other, const typename NumTraits::Real& prec = NumTraits::dummy_precision()) const + EIGEN_DEVICE_FUNC bool isApprox(const Hyperplane& other, const typename NumTraits::Real& prec = NumTraits::dummy_precision()) const { return m_coeffs.isApprox(other.m_coeffs, prec); } protected: diff --git a/Eigen/src/Geometry/OrthoMethods.h b/Eigen/src/Geometry/OrthoMethods.h index c3648f51f..a035e6310 100644 --- a/Eigen/src/Geometry/OrthoMethods.h +++ b/Eigen/src/Geometry/OrthoMethods.h @@ -27,7 +27,7 @@ namespace Eigen { template template #ifndef EIGEN_PARSED_BY_DOXYGEN -inline typename MatrixBase::template cross_product_return_type::type +EIGEN_DEVICE_FUNC inline typename MatrixBase::template cross_product_return_type::type #else inline typename MatrixBase::PlainObject #endif @@ -53,7 +53,7 @@ template< int Arch,typename VectorLhs,typename VectorRhs, typename Scalar = typename VectorLhs::Scalar, bool Vectorizable = bool((VectorLhs::Flags&VectorRhs::Flags)&PacketAccessBit)> struct cross3_impl { - static inline typename internal::plain_matrix_type::type + EIGEN_DEVICE_FUNC static inline typename internal::plain_matrix_type::type run(const VectorLhs& lhs, const VectorRhs& rhs) { return typename internal::plain_matrix_type::type( @@ -78,7 +78,7 @@ struct cross3_impl { */ template template -inline typename MatrixBase::PlainObject +EIGEN_DEVICE_FUNC inline typename MatrixBase::PlainObject MatrixBase::cross3(const MatrixBase& other) const { EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Derived,4) @@ -105,6 +105,7 @@ MatrixBase::cross3(const MatrixBase& other) const * \sa MatrixBase::cross() */ template template +EIGEN_DEVICE_FUNC const typename VectorwiseOp::CrossReturnType VectorwiseOp::cross(const MatrixBase& other) const { @@ -221,7 +222,7 @@ struct unitOrthogonal_selector * \sa cross() */ template -typename MatrixBase::PlainObject +EIGEN_DEVICE_FUNC typename MatrixBase::PlainObject MatrixBase::unitOrthogonal() const { EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) diff --git a/Eigen/src/Geometry/ParametrizedLine.h b/Eigen/src/Geometry/ParametrizedLine.h index c43dce773..1e985d8cd 100644 --- a/Eigen/src/Geometry/ParametrizedLine.h +++ b/Eigen/src/Geometry/ParametrizedLine.h @@ -41,45 +41,45 @@ public: typedef Matrix VectorType; /** Default constructor without initialization */ - inline ParametrizedLine() {} + EIGEN_DEVICE_FUNC inline ParametrizedLine() {} template - ParametrizedLine(const ParametrizedLine& other) + EIGEN_DEVICE_FUNC ParametrizedLine(const ParametrizedLine& other) : m_origin(other.origin()), m_direction(other.direction()) {} /** Constructs a dynamic-size line with \a _dim the dimension * of the ambient space */ - inline explicit ParametrizedLine(Index _dim) : m_origin(_dim), m_direction(_dim) {} + EIGEN_DEVICE_FUNC inline explicit ParametrizedLine(Index _dim) : m_origin(_dim), m_direction(_dim) {} /** Initializes a parametrized line of direction \a direction and origin \a origin. * \warning the vector direction is assumed to be normalized. */ - ParametrizedLine(const VectorType& origin, const VectorType& direction) + EIGEN_DEVICE_FUNC ParametrizedLine(const VectorType& origin, const VectorType& direction) : m_origin(origin), m_direction(direction) {} template - explicit ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane); + EIGEN_DEVICE_FUNC explicit ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane); /** Constructs a parametrized line going from \a p0 to \a p1. */ - static inline ParametrizedLine Through(const VectorType& p0, const VectorType& p1) + EIGEN_DEVICE_FUNC static inline ParametrizedLine Through(const VectorType& p0, const VectorType& p1) { return ParametrizedLine(p0, (p1-p0).normalized()); } - ~ParametrizedLine() {} + EIGEN_DEVICE_FUNC ~ParametrizedLine() {} /** \returns the dimension in which the line holds */ - inline Index dim() const { return m_direction.size(); } + EIGEN_DEVICE_FUNC inline Index dim() const { return m_direction.size(); } - const VectorType& origin() const { return m_origin; } - VectorType& origin() { return m_origin; } + EIGEN_DEVICE_FUNC const VectorType& origin() const { return m_origin; } + EIGEN_DEVICE_FUNC VectorType& origin() { return m_origin; } - const VectorType& direction() const { return m_direction; } - VectorType& direction() { return m_direction; } + EIGEN_DEVICE_FUNC const VectorType& direction() const { return m_direction; } + EIGEN_DEVICE_FUNC VectorType& direction() { return m_direction; } /** \returns the squared distance of a point \a p to its projection onto the line \c *this. * \sa distance() */ - RealScalar squaredDistance(const VectorType& p) const + EIGEN_DEVICE_FUNC RealScalar squaredDistance(const VectorType& p) const { VectorType diff = p - origin(); return (diff - direction().dot(diff) * direction()).squaredNorm(); @@ -87,22 +87,22 @@ public: /** \returns the distance of a point \a p to its projection onto the line \c *this. * \sa squaredDistance() */ - RealScalar distance(const VectorType& p) const { using std::sqrt; return sqrt(squaredDistance(p)); } + EIGEN_DEVICE_FUNC RealScalar distance(const VectorType& p) const { EIGEN_USING_STD_MATH(sqrt) return sqrt(squaredDistance(p)); } /** \returns the projection of a point \a p onto the line \c *this. */ - VectorType projection(const VectorType& p) const + EIGEN_DEVICE_FUNC VectorType projection(const VectorType& p) const { return origin() + direction().dot(p-origin()) * direction(); } - VectorType pointAt(const Scalar& t) const; + EIGEN_DEVICE_FUNC VectorType pointAt(const Scalar& t) const; template - Scalar intersectionParameter(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const; + EIGEN_DEVICE_FUNC Scalar intersectionParameter(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const; template - Scalar intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const; + EIGEN_DEVICE_FUNC Scalar intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const; template - VectorType intersectionPoint(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const; + EIGEN_DEVICE_FUNC VectorType intersectionPoint(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const; /** \returns \c *this with scalar type casted to \a NewScalarType * @@ -110,7 +110,7 @@ public: * then this function smartly returns a const reference to \c *this. */ template - inline typename internal::cast_return_type >::type cast() const { return typename internal::cast_return_type - inline explicit ParametrizedLine(const ParametrizedLine& other) + EIGEN_DEVICE_FUNC inline explicit ParametrizedLine(const ParametrizedLine& other) { m_origin = other.origin().template cast(); m_direction = other.direction().template cast(); @@ -129,7 +129,7 @@ public: * determined by \a prec. * * \sa MatrixBase::isApprox() */ - bool isApprox(const ParametrizedLine& other, const typename NumTraits::Real& prec = NumTraits::dummy_precision()) const + EIGEN_DEVICE_FUNC bool isApprox(const ParametrizedLine& other, const typename NumTraits::Real& prec = NumTraits::dummy_precision()) const { return m_origin.isApprox(other.m_origin, prec) && m_direction.isApprox(other.m_direction, prec); } protected: @@ -143,7 +143,7 @@ protected: */ template template -inline ParametrizedLine<_Scalar, _AmbientDim,_Options>::ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim,OtherOptions>& hyperplane) +EIGEN_DEVICE_FUNC inline ParametrizedLine<_Scalar, _AmbientDim,_Options>::ParametrizedLine(const Hyperplane<_Scalar, _AmbientDim,OtherOptions>& hyperplane) { EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 2) direction() = hyperplane.normal().unitOrthogonal(); @@ -153,7 +153,7 @@ inline ParametrizedLine<_Scalar, _AmbientDim,_Options>::ParametrizedLine(const H /** \returns the point at \a t along this line */ template -inline typename ParametrizedLine<_Scalar, _AmbientDim,_Options>::VectorType +EIGEN_DEVICE_FUNC inline typename ParametrizedLine<_Scalar, _AmbientDim,_Options>::VectorType ParametrizedLine<_Scalar, _AmbientDim,_Options>::pointAt(const _Scalar& t) const { return origin() + (direction()*t); @@ -163,7 +163,7 @@ ParametrizedLine<_Scalar, _AmbientDim,_Options>::pointAt(const _Scalar& t) const */ template template -inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersectionParameter(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const +EIGEN_DEVICE_FUNC inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersectionParameter(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const { return -(hyperplane.offset()+hyperplane.normal().dot(origin())) / hyperplane.normal().dot(direction()); @@ -175,7 +175,7 @@ inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersectionPara */ template template -inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const +EIGEN_DEVICE_FUNC inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersection(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const { return intersectionParameter(hyperplane); } @@ -184,7 +184,7 @@ inline _Scalar ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersection(con */ template template -inline typename ParametrizedLine<_Scalar, _AmbientDim,_Options>::VectorType +EIGEN_DEVICE_FUNC inline typename ParametrizedLine<_Scalar, _AmbientDim,_Options>::VectorType ParametrizedLine<_Scalar, _AmbientDim,_Options>::intersectionPoint(const Hyperplane<_Scalar, _AmbientDim, OtherOptions>& hyperplane) const { return pointAt(intersectionParameter(hyperplane)); diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h index c4a0eabb5..932f149e3 100644 --- a/Eigen/src/Geometry/Quaternion.h +++ b/Eigen/src/Geometry/Quaternion.h @@ -58,37 +58,37 @@ class QuaternionBase : public RotationBase /** \returns the \c x coefficient */ - inline Scalar x() const { return this->derived().coeffs().coeff(0); } + EIGEN_DEVICE_FUNC inline Scalar x() const { return this->derived().coeffs().coeff(0); } /** \returns the \c y coefficient */ - inline Scalar y() const { return this->derived().coeffs().coeff(1); } + EIGEN_DEVICE_FUNC inline Scalar y() const { return this->derived().coeffs().coeff(1); } /** \returns the \c z coefficient */ - inline Scalar z() const { return this->derived().coeffs().coeff(2); } + EIGEN_DEVICE_FUNC inline Scalar z() const { return this->derived().coeffs().coeff(2); } /** \returns the \c w coefficient */ - inline Scalar w() const { return this->derived().coeffs().coeff(3); } + EIGEN_DEVICE_FUNC inline Scalar w() const { return this->derived().coeffs().coeff(3); } /** \returns a reference to the \c x coefficient */ - inline Scalar& x() { return this->derived().coeffs().coeffRef(0); } + EIGEN_DEVICE_FUNC inline Scalar& x() { return this->derived().coeffs().coeffRef(0); } /** \returns a reference to the \c y coefficient */ - inline Scalar& y() { return this->derived().coeffs().coeffRef(1); } + EIGEN_DEVICE_FUNC inline Scalar& y() { return this->derived().coeffs().coeffRef(1); } /** \returns a reference to the \c z coefficient */ - inline Scalar& z() { return this->derived().coeffs().coeffRef(2); } + EIGEN_DEVICE_FUNC inline Scalar& z() { return this->derived().coeffs().coeffRef(2); } /** \returns a reference to the \c w coefficient */ - inline Scalar& w() { return this->derived().coeffs().coeffRef(3); } + EIGEN_DEVICE_FUNC inline Scalar& w() { return this->derived().coeffs().coeffRef(3); } /** \returns a read-only vector expression of the imaginary part (x,y,z) */ - inline const VectorBlock vec() const { return coeffs().template head<3>(); } + EIGEN_DEVICE_FUNC inline const VectorBlock vec() const { return coeffs().template head<3>(); } /** \returns a vector expression of the imaginary part (x,y,z) */ - inline VectorBlock vec() { return coeffs().template head<3>(); } + EIGEN_DEVICE_FUNC inline VectorBlock vec() { return coeffs().template head<3>(); } /** \returns a read-only vector expression of the coefficients (x,y,z,w) */ - inline const typename internal::traits::Coefficients& coeffs() const { return derived().coeffs(); } + EIGEN_DEVICE_FUNC inline const typename internal::traits::Coefficients& coeffs() const { return derived().coeffs(); } /** \returns a vector expression of the coefficients (x,y,z,w) */ - inline typename internal::traits::Coefficients& coeffs() { return derived().coeffs(); } + EIGEN_DEVICE_FUNC inline typename internal::traits::Coefficients& coeffs() { return derived().coeffs(); } - EIGEN_STRONG_INLINE QuaternionBase& operator=(const QuaternionBase& other); - template EIGEN_STRONG_INLINE Derived& operator=(const QuaternionBase& other); + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE QuaternionBase& operator=(const QuaternionBase& other); + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const QuaternionBase& other); // disabled this copy operator as it is giving very strange compilation errors when compiling // test_stdvector with GCC 4.4.2. This looks like a GCC bug though, so feel free to re-enable it if it's @@ -97,72 +97,72 @@ class QuaternionBase : public RotationBase // Derived& operator=(const QuaternionBase& other) // { return operator=(other); } - Derived& operator=(const AngleAxisType& aa); - template Derived& operator=(const MatrixBase& m); + EIGEN_DEVICE_FUNC Derived& operator=(const AngleAxisType& aa); + template EIGEN_DEVICE_FUNC Derived& operator=(const MatrixBase& m); /** \returns a quaternion representing an identity rotation * \sa MatrixBase::Identity() */ - static inline Quaternion Identity() { return Quaternion(Scalar(1), Scalar(0), Scalar(0), Scalar(0)); } + EIGEN_DEVICE_FUNC static inline Quaternion Identity() { return Quaternion(Scalar(1), Scalar(0), Scalar(0), Scalar(0)); } /** \sa QuaternionBase::Identity(), MatrixBase::setIdentity() */ - inline QuaternionBase& setIdentity() { coeffs() << Scalar(0), Scalar(0), Scalar(0), Scalar(1); return *this; } + EIGEN_DEVICE_FUNC inline QuaternionBase& setIdentity() { coeffs() << Scalar(0), Scalar(0), Scalar(0), Scalar(1); return *this; } /** \returns the squared norm of the quaternion's coefficients * \sa QuaternionBase::norm(), MatrixBase::squaredNorm() */ - inline Scalar squaredNorm() const { return coeffs().squaredNorm(); } + EIGEN_DEVICE_FUNC inline Scalar squaredNorm() const { return coeffs().squaredNorm(); } /** \returns the norm of the quaternion's coefficients * \sa QuaternionBase::squaredNorm(), MatrixBase::norm() */ - inline Scalar norm() const { return coeffs().norm(); } + EIGEN_DEVICE_FUNC inline Scalar norm() const { return coeffs().norm(); } /** Normalizes the quaternion \c *this * \sa normalized(), MatrixBase::normalize() */ - inline void normalize() { coeffs().normalize(); } + EIGEN_DEVICE_FUNC inline void normalize() { coeffs().normalize(); } /** \returns a normalized copy of \c *this * \sa normalize(), MatrixBase::normalized() */ - inline Quaternion normalized() const { return Quaternion(coeffs().normalized()); } + EIGEN_DEVICE_FUNC inline Quaternion normalized() const { return Quaternion(coeffs().normalized()); } /** \returns the dot product of \c *this and \a other * Geometrically speaking, the dot product of two unit quaternions * corresponds to the cosine of half the angle between the two rotations. * \sa angularDistance() */ - template inline Scalar dot(const QuaternionBase& other) const { return coeffs().dot(other.coeffs()); } + template EIGEN_DEVICE_FUNC inline Scalar dot(const QuaternionBase& other) const { return coeffs().dot(other.coeffs()); } - template Scalar angularDistance(const QuaternionBase& other) const; + template EIGEN_DEVICE_FUNC Scalar angularDistance(const QuaternionBase& other) const; /** \returns an equivalent 3x3 rotation matrix */ - Matrix3 toRotationMatrix() const; + EIGEN_DEVICE_FUNC Matrix3 toRotationMatrix() const; /** \returns the quaternion which transform \a a into \a b through a rotation */ template - Derived& setFromTwoVectors(const MatrixBase& a, const MatrixBase& b); + EIGEN_DEVICE_FUNC Derived& setFromTwoVectors(const MatrixBase& a, const MatrixBase& b); - template EIGEN_STRONG_INLINE Quaternion operator* (const QuaternionBase& q) const; - template EIGEN_STRONG_INLINE Derived& operator*= (const QuaternionBase& q); + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Quaternion operator* (const QuaternionBase& q) const; + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator*= (const QuaternionBase& q); /** \returns the quaternion describing the inverse rotation */ - Quaternion inverse() const; + EIGEN_DEVICE_FUNC Quaternion inverse() const; /** \returns the conjugated quaternion */ - Quaternion conjugate() const; + EIGEN_DEVICE_FUNC Quaternion conjugate() const; - template Quaternion slerp(const Scalar& t, const QuaternionBase& other) const; + template EIGEN_DEVICE_FUNC Quaternion slerp(const Scalar& t, const QuaternionBase& other) const; /** \returns \c true if \c *this is approximately equal to \a other, within the precision * determined by \a prec. * * \sa MatrixBase::isApprox() */ template - bool isApprox(const QuaternionBase& other, const RealScalar& prec = NumTraits::dummy_precision()) const + EIGEN_DEVICE_FUNC bool isApprox(const QuaternionBase& other, const RealScalar& prec = NumTraits::dummy_precision()) const { return coeffs().isApprox(other.coeffs(), prec); } /** return the result vector of \a v through the rotation*/ - EIGEN_STRONG_INLINE Vector3 _transformVector(const Vector3& v) const; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Vector3 _transformVector(const Vector3& v) const; /** \returns \c *this with scalar type casted to \a NewScalarType * @@ -170,7 +170,7 @@ class QuaternionBase : public RotationBase * then this function smartly returns a const reference to \c *this. */ template - inline typename internal::cast_return_type >::type cast() const + EIGEN_DEVICE_FUNC inline typename internal::cast_return_type >::type cast() const { return typename internal::cast_return_type >::type(derived()); } @@ -239,7 +239,7 @@ public: typedef typename Base::AngleAxisType AngleAxisType; /** Default constructor leaving the quaternion uninitialized. */ - inline Quaternion() {} + EIGEN_DEVICE_FUNC inline Quaternion() {} /** Constructs and initializes the quaternion \f$ w+xi+yj+zk \f$ from * its four coefficients \a w, \a x, \a y and \a z. @@ -248,36 +248,36 @@ public: * while internally the coefficients are stored in the following order: * [\c x, \c y, \c z, \c w] */ - inline Quaternion(const Scalar& w, const Scalar& x, const Scalar& y, const Scalar& z) : m_coeffs(x, y, z, w){} + EIGEN_DEVICE_FUNC inline Quaternion(const Scalar& w, const Scalar& x, const Scalar& y, const Scalar& z) : m_coeffs(x, y, z, w){} /** Constructs and initialize a quaternion from the array data */ - explicit inline Quaternion(const Scalar* data) : m_coeffs(data) {} + EIGEN_DEVICE_FUNC explicit inline Quaternion(const Scalar* data) : m_coeffs(data) {} /** Copy constructor */ - template EIGEN_STRONG_INLINE Quaternion(const QuaternionBase& other) { this->Base::operator=(other); } + template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Quaternion(const QuaternionBase& other) { this->Base::operator=(other); } /** Constructs and initializes a quaternion from the angle-axis \a aa */ - explicit inline Quaternion(const AngleAxisType& aa) { *this = aa; } + EIGEN_DEVICE_FUNC explicit inline Quaternion(const AngleAxisType& aa) { *this = aa; } /** Constructs and initializes a quaternion from either: * - a rotation matrix expression, * - a 4D vector expression representing quaternion coefficients. */ template - explicit inline Quaternion(const MatrixBase& other) { *this = other; } + EIGEN_DEVICE_FUNC explicit inline Quaternion(const MatrixBase& other) { *this = other; } /** Explicit copy constructor with scalar conversion */ template - explicit inline Quaternion(const Quaternion& other) + EIGEN_DEVICE_FUNC explicit inline Quaternion(const Quaternion& other) { m_coeffs = other.coeffs().template cast(); } - static Quaternion UnitRandom(); + EIGEN_DEVICE_FUNC static Quaternion UnitRandom(); template - static Quaternion FromTwoVectors(const MatrixBase& a, const MatrixBase& b); + EIGEN_DEVICE_FUNC static Quaternion FromTwoVectors(const MatrixBase& a, const MatrixBase& b); - inline Coefficients& coeffs() { return m_coeffs;} - inline const Coefficients& coeffs() const { return m_coeffs;} + EIGEN_DEVICE_FUNC inline Coefficients& coeffs() { return m_coeffs;} + EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs;} EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool(NeedsAlignment)) @@ -357,9 +357,9 @@ class Map, _Options > * \code *coeffs == {x, y, z, w} \endcode * * If the template parameter _Options is set to #Aligned, then the pointer coeffs must be aligned. */ - explicit EIGEN_STRONG_INLINE Map(const Scalar* coeffs) : m_coeffs(coeffs) {} + EIGEN_DEVICE_FUNC explicit EIGEN_STRONG_INLINE Map(const Scalar* coeffs) : m_coeffs(coeffs) {} - inline const Coefficients& coeffs() const { return m_coeffs;} + EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs;} protected: const Coefficients m_coeffs; @@ -394,10 +394,10 @@ class Map, _Options > * \code *coeffs == {x, y, z, w} \endcode * * If the template parameter _Options is set to #Aligned, then the pointer coeffs must be aligned. */ - explicit EIGEN_STRONG_INLINE Map(Scalar* coeffs) : m_coeffs(coeffs) {} + EIGEN_DEVICE_FUNC explicit EIGEN_STRONG_INLINE Map(Scalar* coeffs) : m_coeffs(coeffs) {} - inline Coefficients& coeffs() { return m_coeffs; } - inline const Coefficients& coeffs() const { return m_coeffs; } + EIGEN_DEVICE_FUNC inline Coefficients& coeffs() { return m_coeffs; } + EIGEN_DEVICE_FUNC inline const Coefficients& coeffs() const { return m_coeffs; } protected: Coefficients m_coeffs; @@ -425,7 +425,7 @@ typedef Map, Aligned> QuaternionMapAlignedd; namespace internal { template struct quat_product { - static EIGEN_STRONG_INLINE Quaternion run(const QuaternionBase& a, const QuaternionBase& b){ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion run(const QuaternionBase& a, const QuaternionBase& b){ return Quaternion ( a.w() * b.w() - a.x() * b.x() - a.y() * b.y() - a.z() * b.z(), @@ -440,7 +440,7 @@ template template -EIGEN_STRONG_INLINE Quaternion::Scalar> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Quaternion::Scalar> QuaternionBase::operator* (const QuaternionBase& other) const { EIGEN_STATIC_ASSERT((internal::is_same::value), @@ -453,7 +453,7 @@ QuaternionBase::operator* (const QuaternionBase& other) c /** \sa operator*(Quaternion) */ template template -EIGEN_STRONG_INLINE Derived& QuaternionBase::operator*= (const QuaternionBase& other) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& QuaternionBase::operator*= (const QuaternionBase& other) { derived() = derived() * other.derived(); return derived(); @@ -467,7 +467,7 @@ EIGEN_STRONG_INLINE Derived& QuaternionBase::operator*= (const Quaterni * - Via a Matrix3: 24 + 15n */ template -EIGEN_STRONG_INLINE typename QuaternionBase::Vector3 +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename QuaternionBase::Vector3 QuaternionBase::_transformVector(const Vector3& v) const { // Note that this algorithm comes from the optimization by hand @@ -481,7 +481,7 @@ QuaternionBase::_transformVector(const Vector3& v) const } template -EIGEN_STRONG_INLINE QuaternionBase& QuaternionBase::operator=(const QuaternionBase& other) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE QuaternionBase& QuaternionBase::operator=(const QuaternionBase& other) { coeffs() = other.coeffs(); return derived(); @@ -489,7 +489,7 @@ EIGEN_STRONG_INLINE QuaternionBase& QuaternionBase::operator=( template template -EIGEN_STRONG_INLINE Derived& QuaternionBase::operator=(const QuaternionBase& other) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& QuaternionBase::operator=(const QuaternionBase& other) { coeffs() = other.coeffs(); return derived(); @@ -498,10 +498,10 @@ EIGEN_STRONG_INLINE Derived& QuaternionBase::operator=(const Quaternion /** Set \c *this from an angle-axis \a aa and returns a reference to \c *this */ template -EIGEN_STRONG_INLINE Derived& QuaternionBase::operator=(const AngleAxisType& aa) +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& QuaternionBase::operator=(const AngleAxisType& aa) { - using std::cos; - using std::sin; + EIGEN_USING_STD_MATH(cos) + EIGEN_USING_STD_MATH(sin) Scalar ha = Scalar(0.5)*aa.angle(); // Scalar(0.5) to suppress precision loss warnings this->w() = cos(ha); this->vec() = sin(ha) * aa.axis(); @@ -516,7 +516,7 @@ EIGEN_STRONG_INLINE Derived& QuaternionBase::operator=(const AngleAxisT template template -inline Derived& QuaternionBase::operator=(const MatrixBase& xpr) +EIGEN_DEVICE_FUNC inline Derived& QuaternionBase::operator=(const MatrixBase& xpr) { EIGEN_STATIC_ASSERT((internal::is_same::value), YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) @@ -528,7 +528,7 @@ inline Derived& QuaternionBase::operator=(const MatrixBase -inline typename QuaternionBase::Matrix3 +EIGEN_DEVICE_FUNC inline typename QuaternionBase::Matrix3 QuaternionBase::toRotationMatrix(void) const { // NOTE if inlined, then gcc 4.2 and 4.4 get rid of the temporary (not gcc 4.3 !!) @@ -575,9 +575,9 @@ QuaternionBase::toRotationMatrix(void) const */ template template -inline Derived& QuaternionBase::setFromTwoVectors(const MatrixBase& a, const MatrixBase& b) +EIGEN_DEVICE_FUNC inline Derived& QuaternionBase::setFromTwoVectors(const MatrixBase& a, const MatrixBase& b) { - using std::sqrt; + EIGEN_USING_STD_MATH(sqrt) Vector3 v0 = a.normalized(); Vector3 v1 = b.normalized(); Scalar c = v1.dot(v0); @@ -616,11 +616,11 @@ inline Derived& QuaternionBase::setFromTwoVectors(const MatrixBase -Quaternion Quaternion::UnitRandom() +EIGEN_DEVICE_FUNC Quaternion Quaternion::UnitRandom() { - using std::sqrt; - using std::sin; - using std::cos; + EIGEN_USING_STD_MATH(sqrt) + EIGEN_USING_STD_MATH(sin) + EIGEN_USING_STD_MATH(cos) const Scalar u1 = internal::random(0, 1), u2 = internal::random(0, 2*EIGEN_PI), u3 = internal::random(0, 2*EIGEN_PI); @@ -642,7 +642,7 @@ Quaternion Quaternion::UnitRandom() */ template template -Quaternion Quaternion::FromTwoVectors(const MatrixBase& a, const MatrixBase& b) +EIGEN_DEVICE_FUNC Quaternion Quaternion::FromTwoVectors(const MatrixBase& a, const MatrixBase& b) { Quaternion quat; quat.setFromTwoVectors(a, b); @@ -657,7 +657,7 @@ Quaternion Quaternion::FromTwoVectors(const Matr * \sa QuaternionBase::conjugate() */ template -inline Quaternion::Scalar> QuaternionBase::inverse() const +EIGEN_DEVICE_FUNC inline Quaternion::Scalar> QuaternionBase::inverse() const { // FIXME should this function be called multiplicativeInverse and conjugate() be called inverse() or opposite() ?? Scalar n2 = this->squaredNorm(); @@ -674,7 +674,7 @@ inline Quaternion::Scalar> QuaternionBase struct quat_conj { - static EIGEN_STRONG_INLINE Quaternion run(const QuaternionBase& q){ + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Quaternion run(const QuaternionBase& q){ return Quaternion(q.w(),-q.x(),-q.y(),-q.z()); } }; @@ -687,7 +687,7 @@ template struct quat_con * \sa Quaternion2::inverse() */ template -inline Quaternion::Scalar> +EIGEN_DEVICE_FUNC inline Quaternion::Scalar> QuaternionBase::conjugate() const { return internal::quat_conj::conjugate() const */ template template -inline typename internal::traits::Scalar +EIGEN_DEVICE_FUNC inline typename internal::traits::Scalar QuaternionBase::angularDistance(const QuaternionBase& other) const { - using std::atan2; - using std::abs; + EIGEN_USING_STD_MATH(atan2) + EIGEN_USING_STD_MATH(abs) Quaternion d = (*this) * other.conjugate(); return Scalar(2) * atan2( d.vec().norm(), abs(d.w()) ); } @@ -720,12 +720,12 @@ QuaternionBase::angularDistance(const QuaternionBase& oth */ template template -Quaternion::Scalar> +EIGEN_DEVICE_FUNC Quaternion::Scalar> QuaternionBase::slerp(const Scalar& t, const QuaternionBase& other) const { - using std::acos; - using std::sin; - using std::abs; + EIGEN_USING_STD_MATH(acos) + EIGEN_USING_STD_MATH(sin) + EIGEN_USING_STD_MATH(abs) const Scalar one = Scalar(1) - NumTraits::epsilon(); Scalar d = this->dot(other); Scalar absD = abs(d); @@ -759,10 +759,10 @@ template struct quaternionbase_assign_impl { typedef typename Other::Scalar Scalar; - template static inline void run(QuaternionBase& q, const Other& a_mat) + template EIGEN_DEVICE_FUNC static inline void run(QuaternionBase& q, const Other& a_mat) { const typename internal::nested_eval::type mat(a_mat); - using std::sqrt; + EIGEN_USING_STD_MATH(sqrt) // This algorithm comes from "Quaternion Calculus and Fast Animation", // Ken Shoemake, 1987 SIGGRAPH course notes Scalar t = mat.trace(); @@ -800,7 +800,7 @@ template struct quaternionbase_assign_impl { typedef typename Other::Scalar Scalar; - template static inline void run(QuaternionBase& q, const Other& vec) + template EIGEN_DEVICE_FUNC static inline void run(QuaternionBase& q, const Other& vec) { q.coeffs() = vec; } diff --git a/Eigen/src/Geometry/Rotation2D.h b/Eigen/src/Geometry/Rotation2D.h index b42a7df70..884b7d0ee 100644 --- a/Eigen/src/Geometry/Rotation2D.h +++ b/Eigen/src/Geometry/Rotation2D.h @@ -59,35 +59,35 @@ protected: public: /** Construct a 2D counter clock wise rotation from the angle \a a in radian. */ - explicit inline Rotation2D(const Scalar& a) : m_angle(a) {} + EIGEN_DEVICE_FUNC explicit inline Rotation2D(const Scalar& a) : m_angle(a) {} /** Default constructor wihtout initialization. The represented rotation is undefined. */ - Rotation2D() {} + EIGEN_DEVICE_FUNC Rotation2D() {} /** Construct a 2D rotation from a 2x2 rotation matrix \a mat. * * \sa fromRotationMatrix() */ template - explicit Rotation2D(const MatrixBase& m) + EIGEN_DEVICE_FUNC explicit Rotation2D(const MatrixBase& m) { fromRotationMatrix(m.derived()); } /** \returns the rotation angle */ - inline Scalar angle() const { return m_angle; } + EIGEN_DEVICE_FUNC inline Scalar angle() const { return m_angle; } /** \returns a read-write reference to the rotation angle */ - inline Scalar& angle() { return m_angle; } + EIGEN_DEVICE_FUNC inline Scalar& angle() { return m_angle; } /** \returns the rotation angle in [0,2pi] */ - inline Scalar smallestPositiveAngle() const { + EIGEN_DEVICE_FUNC inline Scalar smallestPositiveAngle() const { Scalar tmp = numext::fmod(m_angle,Scalar(2*EIGEN_PI)); return tmpScalar(EIGEN_PI)) tmp -= Scalar(2*EIGEN_PI); else if(tmp<-Scalar(EIGEN_PI)) tmp += Scalar(2*EIGEN_PI); @@ -95,23 +95,23 @@ public: } /** \returns the inverse rotation */ - inline Rotation2D inverse() const { return Rotation2D(-m_angle); } + EIGEN_DEVICE_FUNC inline Rotation2D inverse() const { return Rotation2D(-m_angle); } /** Concatenates two rotations */ - inline Rotation2D operator*(const Rotation2D& other) const + EIGEN_DEVICE_FUNC inline Rotation2D operator*(const Rotation2D& other) const { return Rotation2D(m_angle + other.m_angle); } /** Concatenates two rotations */ - inline Rotation2D& operator*=(const Rotation2D& other) + EIGEN_DEVICE_FUNC inline Rotation2D& operator*=(const Rotation2D& other) { m_angle += other.m_angle; return *this; } /** Applies the rotation to a 2D vector */ - Vector2 operator* (const Vector2& vec) const + EIGEN_DEVICE_FUNC Vector2 operator* (const Vector2& vec) const { return toRotationMatrix() * vec; } template - Rotation2D& fromRotationMatrix(const MatrixBase& m); - Matrix2 toRotationMatrix() const; + EIGEN_DEVICE_FUNC Rotation2D& fromRotationMatrix(const MatrixBase& m); + EIGEN_DEVICE_FUNC Matrix2 toRotationMatrix() const; /** Set \c *this from a 2x2 rotation matrix \a mat. * In other words, this function extract the rotation angle from the rotation matrix. @@ -121,13 +121,13 @@ public: * \sa fromRotationMatrix() */ template - Rotation2D& operator=(const MatrixBase& m) + EIGEN_DEVICE_FUNC Rotation2D& operator=(const MatrixBase& m) { return fromRotationMatrix(m.derived()); } /** \returns the spherical interpolation between \c *this and \a other using * parameter \a t. It is in fact equivalent to a linear interpolation. */ - inline Rotation2D slerp(const Scalar& t, const Rotation2D& other) const + EIGEN_DEVICE_FUNC inline Rotation2D slerp(const Scalar& t, const Rotation2D& other) const { Scalar dist = Rotation2D(other.m_angle-m_angle).smallestAngle(); return Rotation2D(m_angle + dist*t); @@ -139,23 +139,23 @@ public: * then this function smartly returns a const reference to \c *this. */ template - inline typename internal::cast_return_type >::type cast() const + EIGEN_DEVICE_FUNC inline typename internal::cast_return_type >::type cast() const { return typename internal::cast_return_type >::type(*this); } /** Copy constructor with scalar type conversion */ template - inline explicit Rotation2D(const Rotation2D& other) + EIGEN_DEVICE_FUNC inline explicit Rotation2D(const Rotation2D& other) { m_angle = Scalar(other.angle()); } - static inline Rotation2D Identity() { return Rotation2D(0); } + EIGEN_DEVICE_FUNC static inline Rotation2D Identity() { return Rotation2D(0); } /** \returns \c true if \c *this is approximately equal to \a other, within the precision * determined by \a prec. * * \sa MatrixBase::isApprox() */ - bool isApprox(const Rotation2D& other, const typename NumTraits::Real& prec = NumTraits::dummy_precision()) const + EIGEN_DEVICE_FUNC bool isApprox(const Rotation2D& other, const typename NumTraits::Real& prec = NumTraits::dummy_precision()) const { return internal::isApprox(m_angle,other.m_angle, prec); } }; @@ -173,9 +173,9 @@ typedef Rotation2D Rotation2Dd; */ template template -Rotation2D& Rotation2D::fromRotationMatrix(const MatrixBase& mat) +EIGEN_DEVICE_FUNC Rotation2D& Rotation2D::fromRotationMatrix(const MatrixBase& mat) { - using std::atan2; + EIGEN_USING_STD_MATH(atan2) EIGEN_STATIC_ASSERT(Derived::RowsAtCompileTime==2 && Derived::ColsAtCompileTime==2,YOU_MADE_A_PROGRAMMING_MISTAKE) m_angle = atan2(mat.coeff(1,0), mat.coeff(0,0)); return *this; @@ -185,10 +185,10 @@ Rotation2D& Rotation2D::fromRotationMatrix(const MatrixBase typename Rotation2D::Matrix2 -Rotation2D::toRotationMatrix(void) const +EIGEN_DEVICE_FUNC Rotation2D::toRotationMatrix(void) const { - using std::sin; - using std::cos; + EIGEN_USING_STD_MATH(sin) + EIGEN_USING_STD_MATH(cos) Scalar sinA = sin(m_angle); Scalar cosA = cos(m_angle); return (Matrix2() << cosA, -sinA, sinA, cosA).finished(); diff --git a/Eigen/src/Geometry/RotationBase.h b/Eigen/src/Geometry/RotationBase.h index fadfd9151..f0ee0bd03 100644 --- a/Eigen/src/Geometry/RotationBase.h +++ b/Eigen/src/Geometry/RotationBase.h @@ -38,26 +38,26 @@ class RotationBase typedef Matrix VectorType; public: - inline const Derived& derived() const { return *static_cast(this); } - inline Derived& derived() { return *static_cast(this); } + EIGEN_DEVICE_FUNC inline const Derived& derived() const { return *static_cast(this); } + EIGEN_DEVICE_FUNC inline Derived& derived() { return *static_cast(this); } /** \returns an equivalent rotation matrix */ - inline RotationMatrixType toRotationMatrix() const { return derived().toRotationMatrix(); } + EIGEN_DEVICE_FUNC inline RotationMatrixType toRotationMatrix() const { return derived().toRotationMatrix(); } /** \returns an equivalent rotation matrix * This function is added to be conform with the Transform class' naming scheme. */ - inline RotationMatrixType matrix() const { return derived().toRotationMatrix(); } + EIGEN_DEVICE_FUNC inline RotationMatrixType matrix() const { return derived().toRotationMatrix(); } /** \returns the inverse rotation */ - inline Derived inverse() const { return derived().inverse(); } + EIGEN_DEVICE_FUNC inline Derived inverse() const { return derived().inverse(); } /** \returns the concatenation of the rotation \c *this with a translation \a t */ - inline Transform operator*(const Translation& t) const + EIGEN_DEVICE_FUNC inline Transform operator*(const Translation& t) const { return Transform(*this) * t; } /** \returns the concatenation of the rotation \c *this with a uniform scaling \a s */ - inline RotationMatrixType operator*(const UniformScaling& s) const + EIGEN_DEVICE_FUNC inline RotationMatrixType operator*(const UniformScaling& s) const { return toRotationMatrix() * s.factor(); } /** \returns the concatenation of the rotation \c *this with a generic expression \a e @@ -67,17 +67,17 @@ class RotationBase * - a vector of size Dim */ template - EIGEN_STRONG_INLINE typename internal::rotation_base_generic_product_selector::ReturnType + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::rotation_base_generic_product_selector::ReturnType operator*(const EigenBase& e) const { return internal::rotation_base_generic_product_selector::run(derived(), e.derived()); } /** \returns the concatenation of a linear transformation \a l with the rotation \a r */ template friend - inline RotationMatrixType operator*(const EigenBase& l, const Derived& r) + EIGEN_DEVICE_FUNC inline RotationMatrixType operator*(const EigenBase& l, const Derived& r) { return l.derived() * r.toRotationMatrix(); } /** \returns the concatenation of a scaling \a l with the rotation \a r */ - friend inline Transform operator*(const DiagonalMatrix& l, const Derived& r) + EIGEN_DEVICE_FUNC friend inline Transform operator*(const DiagonalMatrix& l, const Derived& r) { Transform res(r); res.linear().applyOnTheLeft(l); @@ -86,11 +86,11 @@ class RotationBase /** \returns the concatenation of the rotation \c *this with a transformation \a t */ template - inline Transform operator*(const Transform& t) const + EIGEN_DEVICE_FUNC inline Transform operator*(const Transform& t) const { return toRotationMatrix() * t; } template - inline VectorType _transformVector(const OtherVectorType& v) const + EIGEN_DEVICE_FUNC inline VectorType _transformVector(const OtherVectorType& v) const { return toRotationMatrix() * v; } }; @@ -102,7 +102,7 @@ struct rotation_base_generic_product_selector { enum { Dim = RotationDerived::Dim }; typedef Matrix ReturnType; - static inline ReturnType run(const RotationDerived& r, const MatrixType& m) + EIGEN_DEVICE_FUNC static inline ReturnType run(const RotationDerived& r, const MatrixType& m) { return r.toRotationMatrix() * m; } }; @@ -110,7 +110,7 @@ template struct rotation_base_generic_product_selector< RotationDerived, DiagonalMatrix, false > { typedef Transform ReturnType; - static inline ReturnType run(const RotationDerived& r, const DiagonalMatrix& m) + EIGEN_DEVICE_FUNC static inline ReturnType run(const RotationDerived& r, const DiagonalMatrix& m) { ReturnType res(r); res.linear() *= m; @@ -123,7 +123,7 @@ struct rotation_base_generic_product_selector ReturnType; - static EIGEN_STRONG_INLINE ReturnType run(const RotationDerived& r, const OtherVectorType& v) + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE ReturnType run(const RotationDerived& r, const OtherVectorType& v) { return r._transformVector(v); } @@ -137,7 +137,7 @@ struct rotation_base_generic_product_selector template -Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols> +EIGEN_DEVICE_FUNC Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols> ::Matrix(const RotationBase& r) { EIGEN_STATIC_ASSERT_MATRIX_SPECIFIC_SIZE(Matrix,int(OtherDerived::Dim),int(OtherDerived::Dim)) @@ -150,7 +150,7 @@ Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols> */ template template -Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>& +EIGEN_DEVICE_FUNC Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols>& Matrix<_Scalar, _Rows, _Cols, _Storage, _MaxRows, _MaxCols> ::operator=(const RotationBase& r) { @@ -179,20 +179,20 @@ namespace internal { * \sa class Transform, class Rotation2D, class Quaternion, class AngleAxis */ template -static inline Matrix toRotationMatrix(const Scalar& s) +EIGEN_DEVICE_FUNC static inline Matrix toRotationMatrix(const Scalar& s) { EIGEN_STATIC_ASSERT(Dim==2,YOU_MADE_A_PROGRAMMING_MISTAKE) return Rotation2D(s).toRotationMatrix(); } template -static inline Matrix toRotationMatrix(const RotationBase& r) +EIGEN_DEVICE_FUNC static inline Matrix toRotationMatrix(const RotationBase& r) { return r.toRotationMatrix(); } template -static inline const MatrixBase& toRotationMatrix(const MatrixBase& mat) +EIGEN_DEVICE_FUNC static inline const MatrixBase& toRotationMatrix(const MatrixBase& mat) { EIGEN_STATIC_ASSERT(OtherDerived::RowsAtCompileTime==Dim && OtherDerived::ColsAtCompileTime==Dim, YOU_MADE_A_PROGRAMMING_MISTAKE) diff --git a/Eigen/src/Geometry/Transform.h b/Eigen/src/Geometry/Transform.h index 8f6c62d63..3f31ee45d 100644 --- a/Eigen/src/Geometry/Transform.h +++ b/Eigen/src/Geometry/Transform.h @@ -253,43 +253,43 @@ public: /** Default constructor without initialization of the meaningful coefficients. * If Mode==Affine, then the last row is set to [0 ... 0 1] */ - inline Transform() + EIGEN_DEVICE_FUNC inline Transform() { check_template_params(); internal::transform_make_affine<(int(Mode)==Affine) ? Affine : AffineCompact>::run(m_matrix); } - inline Transform(const Transform& other) + EIGEN_DEVICE_FUNC inline Transform(const Transform& other) { check_template_params(); m_matrix = other.m_matrix; } - inline explicit Transform(const TranslationType& t) + EIGEN_DEVICE_FUNC inline explicit Transform(const TranslationType& t) { check_template_params(); *this = t; } - inline explicit Transform(const UniformScaling& s) + EIGEN_DEVICE_FUNC inline explicit Transform(const UniformScaling& s) { check_template_params(); *this = s; } template - inline explicit Transform(const RotationBase& r) + EIGEN_DEVICE_FUNC inline explicit Transform(const RotationBase& r) { check_template_params(); *this = r; } - inline Transform& operator=(const Transform& other) + EIGEN_DEVICE_FUNC inline Transform& operator=(const Transform& other) { m_matrix = other.m_matrix; return *this; } typedef internal::transform_take_affine_part take_affine_part; /** Constructs and initializes a transformation from a Dim^2 or a (Dim+1)^2 matrix. */ template - inline explicit Transform(const EigenBase& other) + EIGEN_DEVICE_FUNC inline explicit Transform(const EigenBase& other) { EIGEN_STATIC_ASSERT((internal::is_same::value), YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY); @@ -300,7 +300,7 @@ public: /** Set \c *this from a Dim^2 or (Dim+1)^2 matrix. */ template - inline Transform& operator=(const EigenBase& other) + EIGEN_DEVICE_FUNC inline Transform& operator=(const EigenBase& other) { EIGEN_STATIC_ASSERT((internal::is_same::value), YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY); @@ -310,7 +310,7 @@ public: } template - inline Transform(const Transform& other) + EIGEN_DEVICE_FUNC inline Transform(const Transform& other) { check_template_params(); // only the options change, we can directly copy the matrices @@ -318,7 +318,7 @@ public: } template - inline Transform(const Transform& other) + EIGEN_DEVICE_FUNC inline Transform(const Transform& other) { check_template_params(); // prevent conversions as: @@ -359,14 +359,14 @@ public: } template - Transform(const ReturnByValue& other) + EIGEN_DEVICE_FUNC Transform(const ReturnByValue& other) { check_template_params(); other.evalTo(*this); } template - Transform& operator=(const ReturnByValue& other) + EIGEN_DEVICE_FUNC Transform& operator=(const ReturnByValue& other) { other.evalTo(*this); return *this; @@ -381,35 +381,35 @@ public: inline QTransform toQTransform(void) const; #endif - Index rows() const { return int(Mode)==int(Projective) ? m_matrix.cols() : (m_matrix.cols()-1); } - Index cols() const { return m_matrix.cols(); } + EIGEN_DEVICE_FUNC Index rows() const { return int(Mode)==int(Projective) ? m_matrix.cols() : (m_matrix.cols()-1); } + EIGEN_DEVICE_FUNC Index cols() const { return m_matrix.cols(); } /** shortcut for m_matrix(row,col); * \sa MatrixBase::operator(Index,Index) const */ - inline Scalar operator() (Index row, Index col) const { return m_matrix(row,col); } + EIGEN_DEVICE_FUNC inline Scalar operator() (Index row, Index col) const { return m_matrix(row,col); } /** shortcut for m_matrix(row,col); * \sa MatrixBase::operator(Index,Index) */ - inline Scalar& operator() (Index row, Index col) { return m_matrix(row,col); } + EIGEN_DEVICE_FUNC inline Scalar& operator() (Index row, Index col) { return m_matrix(row,col); } /** \returns a read-only expression of the transformation matrix */ - inline const MatrixType& matrix() const { return m_matrix; } + EIGEN_DEVICE_FUNC inline const MatrixType& matrix() const { return m_matrix; } /** \returns a writable expression of the transformation matrix */ - inline MatrixType& matrix() { return m_matrix; } + EIGEN_DEVICE_FUNC inline MatrixType& matrix() { return m_matrix; } /** \returns a read-only expression of the linear part of the transformation */ - inline ConstLinearPart linear() const { return ConstLinearPart(m_matrix,0,0); } + EIGEN_DEVICE_FUNC inline ConstLinearPart linear() const { return ConstLinearPart(m_matrix,0,0); } /** \returns a writable expression of the linear part of the transformation */ - inline LinearPart linear() { return LinearPart(m_matrix,0,0); } + EIGEN_DEVICE_FUNC inline LinearPart linear() { return LinearPart(m_matrix,0,0); } /** \returns a read-only expression of the Dim x HDim affine part of the transformation */ - inline ConstAffinePart affine() const { return take_affine_part::run(m_matrix); } + EIGEN_DEVICE_FUNC inline ConstAffinePart affine() const { return take_affine_part::run(m_matrix); } /** \returns a writable expression of the Dim x HDim affine part of the transformation */ - inline AffinePart affine() { return take_affine_part::run(m_matrix); } + EIGEN_DEVICE_FUNC inline AffinePart affine() { return take_affine_part::run(m_matrix); } /** \returns a read-only expression of the translation vector of the transformation */ - inline ConstTranslationPart translation() const { return ConstTranslationPart(m_matrix,0,Dim); } + EIGEN_DEVICE_FUNC inline ConstTranslationPart translation() const { return ConstTranslationPart(m_matrix,0,Dim); } /** \returns a writable expression of the translation vector of the transformation */ - inline TranslationPart translation() { return TranslationPart(m_matrix,0,Dim); } + EIGEN_DEVICE_FUNC inline TranslationPart translation() { return TranslationPart(m_matrix,0,Dim); } /** \returns an expression of the product between the transform \c *this and a matrix expression \a other. * @@ -437,7 +437,7 @@ public: */ // note: this function is defined here because some compilers cannot find the respective declaration template - EIGEN_STRONG_INLINE const typename internal::transform_right_product_impl::ResultType + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename internal::transform_right_product_impl::ResultType operator * (const EigenBase &other) const { return internal::transform_right_product_impl::run(*this,other.derived()); } @@ -449,7 +449,7 @@ public: * \li a general transformation matrix of size Dim+1 x Dim+1. */ template friend - inline const typename internal::transform_left_product_impl::ResultType + EIGEN_DEVICE_FUNC inline const typename internal::transform_left_product_impl::ResultType operator * (const EigenBase &a, const Transform &b) { return internal::transform_left_product_impl::run(a.derived(),b); } @@ -460,7 +460,7 @@ public: * mode is no isometry. In that case, the returned transform is an affinity. */ template - inline const TransformTimeDiagonalReturnType + EIGEN_DEVICE_FUNC inline const TransformTimeDiagonalReturnType operator * (const DiagonalBase &b) const { TransformTimeDiagonalReturnType res(*this); @@ -475,7 +475,7 @@ public: * mode is no isometry. In that case, the returned transform is an affinity. */ template - friend inline TransformTimeDiagonalReturnType + EIGEN_DEVICE_FUNC friend inline TransformTimeDiagonalReturnType operator * (const DiagonalBase &a, const Transform &b) { TransformTimeDiagonalReturnType res; @@ -487,10 +487,10 @@ public: } template - inline Transform& operator*=(const EigenBase& other) { return *this = *this * other; } + EIGEN_DEVICE_FUNC inline Transform& operator*=(const EigenBase& other) { return *this = *this * other; } /** Concatenates two transformations */ - inline const Transform operator * (const Transform& other) const + EIGEN_DEVICE_FUNC inline const Transform operator * (const Transform& other) const { return internal::transform_transform_product_impl::run(*this,other); } @@ -522,7 +522,7 @@ public: #else /** Concatenates two different transformations */ template - inline typename internal::transform_transform_product_impl >::ResultType + EIGEN_DEVICE_FUNC inline typename internal::transform_transform_product_impl >::ResultType operator * (const Transform& other) const { return internal::transform_transform_product_impl >::run(*this,other); @@ -530,47 +530,61 @@ public: #endif /** \sa MatrixBase::setIdentity() */ - void setIdentity() { m_matrix.setIdentity(); } + EIGEN_DEVICE_FUNC void setIdentity() { m_matrix.setIdentity(); } /** * \brief Returns an identity transformation. * \todo In the future this function should be returning a Transform expression. */ - static const Transform Identity() + EIGEN_DEVICE_FUNC static const Transform Identity() { return Transform(MatrixType::Identity()); } template + EIGEN_DEVICE_FUNC inline Transform& scale(const MatrixBase &other); template + EIGEN_DEVICE_FUNC inline Transform& prescale(const MatrixBase &other); - inline Transform& scale(const Scalar& s); - inline Transform& prescale(const Scalar& s); + EIGEN_DEVICE_FUNC inline Transform& scale(const Scalar& s); + EIGEN_DEVICE_FUNC inline Transform& prescale(const Scalar& s); template + EIGEN_DEVICE_FUNC inline Transform& translate(const MatrixBase &other); template + EIGEN_DEVICE_FUNC inline Transform& pretranslate(const MatrixBase &other); template + EIGEN_DEVICE_FUNC inline Transform& rotate(const RotationType& rotation); template + EIGEN_DEVICE_FUNC inline Transform& prerotate(const RotationType& rotation); - Transform& shear(const Scalar& sx, const Scalar& sy); - Transform& preshear(const Scalar& sx, const Scalar& sy); + EIGEN_DEVICE_FUNC Transform& shear(const Scalar& sx, const Scalar& sy); + EIGEN_DEVICE_FUNC Transform& preshear(const Scalar& sx, const Scalar& sy); - inline Transform& operator=(const TranslationType& t); + EIGEN_DEVICE_FUNC inline Transform& operator=(const TranslationType& t); + + EIGEN_DEVICE_FUNC inline Transform& operator*=(const TranslationType& t) { return translate(t.vector()); } - inline Transform operator*(const TranslationType& t) const; + + EIGEN_DEVICE_FUNC inline Transform operator*(const TranslationType& t) const; + EIGEN_DEVICE_FUNC inline Transform& operator=(const UniformScaling& t); + + EIGEN_DEVICE_FUNC inline Transform& operator*=(const UniformScaling& s) { return scale(s.factor()); } + + EIGEN_DEVICE_FUNC inline TransformTimeDiagonalReturnType operator*(const UniformScaling& s) const { TransformTimeDiagonalReturnType res = *this; @@ -578,31 +592,36 @@ public: return res; } + EIGEN_DEVICE_FUNC inline Transform& operator*=(const DiagonalMatrix& s) { linearExt() *= s; return *this; } template - inline Transform& operator=(const RotationBase& r); + EIGEN_DEVICE_FUNC inline Transform& operator=(const RotationBase& r); template - inline Transform& operator*=(const RotationBase& r) { return rotate(r.toRotationMatrix()); } + EIGEN_DEVICE_FUNC inline Transform& operator*=(const RotationBase& r) { return rotate(r.toRotationMatrix()); } template - inline Transform operator*(const RotationBase& r) const; + EIGEN_DEVICE_FUNC inline Transform operator*(const RotationBase& r) const; - const LinearMatrixType rotation() const; + EIGEN_DEVICE_FUNC const LinearMatrixType rotation() const; template + EIGEN_DEVICE_FUNC void computeRotationScaling(RotationMatrixType *rotation, ScalingMatrixType *scaling) const; template + EIGEN_DEVICE_FUNC void computeScalingRotation(ScalingMatrixType *scaling, RotationMatrixType *rotation) const; template + EIGEN_DEVICE_FUNC Transform& fromPositionOrientationScale(const MatrixBase &position, const OrientationType& orientation, const MatrixBase &scale); + EIGEN_DEVICE_FUNC inline Transform inverse(TransformTraits traits = (TransformTraits)Mode) const; /** \returns a const pointer to the column major internal matrix */ - const Scalar* data() const { return m_matrix.data(); } + EIGEN_DEVICE_FUNC const Scalar* data() const { return m_matrix.data(); } /** \returns a non-const pointer to the column major internal matrix */ - Scalar* data() { return m_matrix.data(); } + EIGEN_DEVICE_FUNC Scalar* data() { return m_matrix.data(); } /** \returns \c *this with scalar type casted to \a NewScalarType * @@ -610,12 +629,12 @@ public: * then this function smartly returns a const reference to \c *this. */ template - inline typename internal::cast_return_type >::type cast() const + EIGEN_DEVICE_FUNC inline typename internal::cast_return_type >::type cast() const { return typename internal::cast_return_type >::type(*this); } /** Copy constructor with scalar type conversion */ template - inline explicit Transform(const Transform& other) + EIGEN_DEVICE_FUNC inline explicit Transform(const Transform& other) { check_template_params(); m_matrix = other.matrix().template cast(); @@ -625,12 +644,12 @@ public: * determined by \a prec. * * \sa MatrixBase::isApprox() */ - bool isApprox(const Transform& other, const typename NumTraits::Real& prec = NumTraits::dummy_precision()) const + EIGEN_DEVICE_FUNC bool isApprox(const Transform& other, const typename NumTraits::Real& prec = NumTraits::dummy_precision()) const { return m_matrix.isApprox(other.m_matrix, prec); } /** Sets the last row to [0 ... 0 1] */ - void makeAffine() + EIGEN_DEVICE_FUNC void makeAffine() { internal::transform_make_affine::run(m_matrix); } @@ -639,26 +658,26 @@ public: * \returns the Dim x Dim linear part if the transformation is affine, * and the HDim x Dim part for projective transformations. */ - inline Block linearExt() + EIGEN_DEVICE_FUNC inline Block linearExt() { return m_matrix.template block(0,0); } /** \internal * \returns the Dim x Dim linear part if the transformation is affine, * and the HDim x Dim part for projective transformations. */ - inline const Block linearExt() const + EIGEN_DEVICE_FUNC inline const Block linearExt() const { return m_matrix.template block(0,0); } /** \internal * \returns the translation part if the transformation is affine, * and the last column for projective transformations. */ - inline Block translationExt() + EIGEN_DEVICE_FUNC inline Block translationExt() { return m_matrix.template block(0,Dim); } /** \internal * \returns the translation part if the transformation is affine, * and the last column for projective transformations. */ - inline const Block translationExt() const + EIGEN_DEVICE_FUNC inline const Block translationExt() const { return m_matrix.template block(0,Dim); } @@ -668,7 +687,7 @@ public: protected: #ifndef EIGEN_PARSED_BY_DOXYGEN - static EIGEN_STRONG_INLINE void check_template_params() + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void check_template_params() { EIGEN_STATIC_ASSERT((Options & (DontAlign|RowMajor)) == Options, INVALID_MATRIX_TEMPLATE_PARAMETERS) } @@ -821,7 +840,7 @@ QTransform Transform::toQTransform(void) const */ template template -Transform& +EIGEN_DEVICE_FUNC Transform& Transform::scale(const MatrixBase &other) { EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim)) @@ -835,7 +854,7 @@ Transform::scale(const MatrixBase &other) * \sa prescale(Scalar) */ template -inline Transform& Transform::scale(const Scalar& s) +EIGEN_DEVICE_FUNC inline Transform& Transform::scale(const Scalar& s) { EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS) linearExt() *= s; @@ -848,7 +867,7 @@ inline Transform& Transform::s */ template template -Transform& +EIGEN_DEVICE_FUNC Transform& Transform::prescale(const MatrixBase &other) { EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim)) @@ -862,7 +881,7 @@ Transform::prescale(const MatrixBase &oth * \sa scale(Scalar) */ template -inline Transform& Transform::prescale(const Scalar& s) +EIGEN_DEVICE_FUNC inline Transform& Transform::prescale(const Scalar& s) { EIGEN_STATIC_ASSERT(Mode!=int(Isometry), THIS_METHOD_IS_ONLY_FOR_SPECIFIC_TRANSFORMATIONS) m_matrix.template topRows() *= s; @@ -875,7 +894,7 @@ inline Transform& Transform::p */ template template -Transform& +EIGEN_DEVICE_FUNC Transform& Transform::translate(const MatrixBase &other) { EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim)) @@ -889,7 +908,7 @@ Transform::translate(const MatrixBase &ot */ template template -Transform& +EIGEN_DEVICE_FUNC Transform& Transform::pretranslate(const MatrixBase &other) { EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(OtherDerived,int(Dim)) @@ -919,7 +938,7 @@ Transform::pretranslate(const MatrixBase */ template template -Transform& +EIGEN_DEVICE_FUNC Transform& Transform::rotate(const RotationType& rotation) { linearExt() *= internal::toRotationMatrix(rotation); @@ -935,7 +954,7 @@ Transform::rotate(const RotationType& rotation) */ template template -Transform& +EIGEN_DEVICE_FUNC Transform& Transform::prerotate(const RotationType& rotation) { m_matrix.template block(0,0) = internal::toRotationMatrix(rotation) @@ -949,7 +968,7 @@ Transform::prerotate(const RotationType& rotation) * \sa preshear() */ template -Transform& +EIGEN_DEVICE_FUNC Transform& Transform::shear(const Scalar& sx, const Scalar& sy) { EIGEN_STATIC_ASSERT(int(Dim)==2, YOU_MADE_A_PROGRAMMING_MISTAKE) @@ -965,7 +984,7 @@ Transform::shear(const Scalar& sx, const Scalar& sy) * \sa shear() */ template -Transform& +EIGEN_DEVICE_FUNC Transform& Transform::preshear(const Scalar& sx, const Scalar& sy) { EIGEN_STATIC_ASSERT(int(Dim)==2, YOU_MADE_A_PROGRAMMING_MISTAKE) @@ -979,7 +998,7 @@ Transform::preshear(const Scalar& sx, const Scalar& sy) ******************************************************/ template -inline Transform& Transform::operator=(const TranslationType& t) +EIGEN_DEVICE_FUNC inline Transform& Transform::operator=(const TranslationType& t) { linear().setIdentity(); translation() = t.vector(); @@ -988,7 +1007,7 @@ inline Transform& Transform::o } template -inline Transform Transform::operator*(const TranslationType& t) const +EIGEN_DEVICE_FUNC inline Transform Transform::operator*(const TranslationType& t) const { Transform res = *this; res.translate(t.vector()); @@ -996,7 +1015,7 @@ inline Transform Transform::op } template -inline Transform& Transform::operator=(const UniformScaling& s) +EIGEN_DEVICE_FUNC inline Transform& Transform::operator=(const UniformScaling& s) { m_matrix.setZero(); linear().diagonal().fill(s.factor()); @@ -1006,7 +1025,7 @@ inline Transform& Transform::o template template -inline Transform& Transform::operator=(const RotationBase& r) +EIGEN_DEVICE_FUNC inline Transform& Transform::operator=(const RotationBase& r) { linear() = internal::toRotationMatrix(r); translation().setZero(); @@ -1016,7 +1035,7 @@ inline Transform& Transform::o template template -inline Transform Transform::operator*(const RotationBase& r) const +EIGEN_DEVICE_FUNC inline Transform Transform::operator*(const RotationBase& r) const { Transform res = *this; res.rotate(r.derived()); @@ -1035,7 +1054,7 @@ inline Transform Transform::op * \sa computeRotationScaling(), computeScalingRotation(), class SVD */ template -const typename Transform::LinearMatrixType +EIGEN_DEVICE_FUNC const typename Transform::LinearMatrixType Transform::rotation() const { LinearMatrixType result; @@ -1057,7 +1076,7 @@ Transform::rotation() const */ template template -void Transform::computeRotationScaling(RotationMatrixType *rotation, ScalingMatrixType *scaling) const +EIGEN_DEVICE_FUNC void Transform::computeRotationScaling(RotationMatrixType *rotation, ScalingMatrixType *scaling) const { JacobiSVD svd(linear(), ComputeFullU | ComputeFullV); @@ -1086,7 +1105,7 @@ void Transform::computeRotationScaling(RotationMatrixTy */ template template -void Transform::computeScalingRotation(ScalingMatrixType *scaling, RotationMatrixType *rotation) const +EIGEN_DEVICE_FUNC void Transform::computeScalingRotation(ScalingMatrixType *scaling, RotationMatrixType *rotation) const { JacobiSVD svd(linear(), ComputeFullU | ComputeFullV); @@ -1107,7 +1126,7 @@ void Transform::computeScalingRotation(ScalingMatrixTyp */ template template -Transform& +EIGEN_DEVICE_FUNC Transform& Transform::fromPositionOrientationScale(const MatrixBase &position, const OrientationType& orientation, const MatrixBase &scale) { @@ -1124,7 +1143,7 @@ template struct transform_make_affine { template - static void run(MatrixType &mat) + EIGEN_DEVICE_FUNC static void run(MatrixType &mat) { static const int Dim = MatrixType::ColsAtCompileTime-1; mat.template block<1,Dim>(Dim,0).setZero(); @@ -1135,21 +1154,21 @@ struct transform_make_affine template<> struct transform_make_affine { - template static void run(MatrixType &) { } + template EIGEN_DEVICE_FUNC static void run(MatrixType &) { } }; // selector needed to avoid taking the inverse of a 3x4 matrix template struct projective_transform_inverse { - static inline void run(const TransformType&, TransformType&) + EIGEN_DEVICE_FUNC static inline void run(const TransformType&, TransformType&) {} }; template struct projective_transform_inverse { - static inline void run(const TransformType& m, TransformType& res) + EIGEN_DEVICE_FUNC static inline void run(const TransformType& m, TransformType& res) { res.matrix() = m.matrix().inverse(); } @@ -1179,7 +1198,7 @@ struct projective_transform_inverse * \sa MatrixBase::inverse() */ template -Transform +EIGEN_DEVICE_FUNC Transform Transform::inverse(TransformTraits hint) const { Transform res; diff --git a/Eigen/src/Geometry/Translation.h b/Eigen/src/Geometry/Translation.h index b9b9a590c..51d9a82eb 100644 --- a/Eigen/src/Geometry/Translation.h +++ b/Eigen/src/Geometry/Translation.h @@ -51,16 +51,16 @@ protected: public: /** Default constructor without initialization. */ - Translation() {} + EIGEN_DEVICE_FUNC Translation() {} /** */ - inline Translation(const Scalar& sx, const Scalar& sy) + EIGEN_DEVICE_FUNC inline Translation(const Scalar& sx, const Scalar& sy) { eigen_assert(Dim==2); m_coeffs.x() = sx; m_coeffs.y() = sy; } /** */ - inline Translation(const Scalar& sx, const Scalar& sy, const Scalar& sz) + EIGEN_DEVICE_FUNC inline Translation(const Scalar& sx, const Scalar& sy, const Scalar& sz) { eigen_assert(Dim==3); m_coeffs.x() = sx; @@ -68,48 +68,48 @@ public: m_coeffs.z() = sz; } /** Constructs and initialize the translation transformation from a vector of translation coefficients */ - explicit inline Translation(const VectorType& vector) : m_coeffs(vector) {} + EIGEN_DEVICE_FUNC explicit inline Translation(const VectorType& vector) : m_coeffs(vector) {} /** \brief Retruns the x-translation by value. **/ - inline Scalar x() const { return m_coeffs.x(); } + EIGEN_DEVICE_FUNC inline Scalar x() const { return m_coeffs.x(); } /** \brief Retruns the y-translation by value. **/ - inline Scalar y() const { return m_coeffs.y(); } + EIGEN_DEVICE_FUNC inline Scalar y() const { return m_coeffs.y(); } /** \brief Retruns the z-translation by value. **/ - inline Scalar z() const { return m_coeffs.z(); } + EIGEN_DEVICE_FUNC inline Scalar z() const { return m_coeffs.z(); } /** \brief Retruns the x-translation as a reference. **/ - inline Scalar& x() { return m_coeffs.x(); } + EIGEN_DEVICE_FUNC inline Scalar& x() { return m_coeffs.x(); } /** \brief Retruns the y-translation as a reference. **/ - inline Scalar& y() { return m_coeffs.y(); } + EIGEN_DEVICE_FUNC inline Scalar& y() { return m_coeffs.y(); } /** \brief Retruns the z-translation as a reference. **/ - inline Scalar& z() { return m_coeffs.z(); } + EIGEN_DEVICE_FUNC inline Scalar& z() { return m_coeffs.z(); } - const VectorType& vector() const { return m_coeffs; } - VectorType& vector() { return m_coeffs; } + EIGEN_DEVICE_FUNC const VectorType& vector() const { return m_coeffs; } + EIGEN_DEVICE_FUNC VectorType& vector() { return m_coeffs; } - const VectorType& translation() const { return m_coeffs; } - VectorType& translation() { return m_coeffs; } + EIGEN_DEVICE_FUNC const VectorType& translation() const { return m_coeffs; } + EIGEN_DEVICE_FUNC VectorType& translation() { return m_coeffs; } /** Concatenates two translation */ - inline Translation operator* (const Translation& other) const + EIGEN_DEVICE_FUNC inline Translation operator* (const Translation& other) const { return Translation(m_coeffs + other.m_coeffs); } /** Concatenates a translation and a uniform scaling */ - inline AffineTransformType operator* (const UniformScaling& other) const; + EIGEN_DEVICE_FUNC inline AffineTransformType operator* (const UniformScaling& other) const; /** Concatenates a translation and a linear transformation */ template - inline AffineTransformType operator* (const EigenBase& linear) const; + EIGEN_DEVICE_FUNC inline AffineTransformType operator* (const EigenBase& linear) const; /** Concatenates a translation and a rotation */ template - inline IsometryTransformType operator*(const RotationBase& r) const + EIGEN_DEVICE_FUNC inline IsometryTransformType operator*(const RotationBase& r) const { return *this * IsometryTransformType(r); } /** \returns the concatenation of a linear transformation \a l with the translation \a t */ // its a nightmare to define a templated friend function outside its declaration template friend - inline AffineTransformType operator*(const EigenBase& linear, const Translation& t) + EIGEN_DEVICE_FUNC inline AffineTransformType operator*(const EigenBase& linear, const Translation& t) { AffineTransformType res; res.matrix().setZero(); @@ -122,7 +122,7 @@ public: /** Concatenates a translation and a transformation */ template - inline Transform operator* (const Transform& t) const + EIGEN_DEVICE_FUNC inline Transform operator* (const Transform& t) const { Transform res = t; res.pretranslate(m_coeffs); @@ -152,19 +152,19 @@ public: * then this function smartly returns a const reference to \c *this. */ template - inline typename internal::cast_return_type >::type cast() const + EIGEN_DEVICE_FUNC inline typename internal::cast_return_type >::type cast() const { return typename internal::cast_return_type >::type(*this); } /** Copy constructor with scalar type conversion */ template - inline explicit Translation(const Translation& other) + EIGEN_DEVICE_FUNC inline explicit Translation(const Translation& other) { m_coeffs = other.vector().template cast(); } /** \returns \c true if \c *this is approximately equal to \a other, within the precision * determined by \a prec. * * \sa MatrixBase::isApprox() */ - bool isApprox(const Translation& other, const typename NumTraits::Real& prec = NumTraits::dummy_precision()) const + EIGEN_DEVICE_FUNC bool isApprox(const Translation& other, const typename NumTraits::Real& prec = NumTraits::dummy_precision()) const { return m_coeffs.isApprox(other.m_coeffs, prec); } }; @@ -178,7 +178,7 @@ typedef Translation Translation3d; //@} template -inline typename Translation::AffineTransformType +EIGEN_DEVICE_FUNC inline typename Translation::AffineTransformType Translation::operator* (const UniformScaling& other) const { AffineTransformType res; @@ -191,7 +191,7 @@ Translation::operator* (const UniformScaling& other) const template template -inline typename Translation::AffineTransformType +EIGEN_DEVICE_FUNC inline typename Translation::AffineTransformType Translation::operator* (const EigenBase& linear) const { AffineTransformType res; -- cgit v1.2.3 From 471075f7ad8e78d4d3e400bc1124e39102515aec Mon Sep 17 00:00:00 2001 From: Robert Lukierski Date: Wed, 12 Oct 2016 18:59:05 +0100 Subject: Fixes min() warnings. --- Eigen/src/Core/AssignEvaluator.h | 2 +- Eigen/src/Core/CwiseNullaryOp.h | 2 +- Eigen/src/Core/PlainObjectBase.h | 8 ++++---- Eigen/src/Core/TriangularMatrix.h | 6 +++--- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 30b36be11..844b85ab3 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -555,7 +555,7 @@ struct dense_assignment_loop for(Index inner = alignedEnd; inner { m.setZero(); EIGEN_USING_STD_MATH(min) - const Index size = min(m.rows(), m.cols()); + const Index size = (min)(m.rows(), m.cols()); for(Index i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1); return m; } diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index 00313920c..a4ade63b8 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -917,8 +917,8 @@ struct conservative_resize_like_impl // The storage order does not allow us to use reallocation. typename Derived::PlainObject tmp(rows,cols); EIGEN_USING_STD_MATH(min) - const Index common_rows = min(rows, _this.rows()); - const Index common_cols = min(cols, _this.cols()); + const Index common_rows = (min)(rows, _this.rows()); + const Index common_cols = (min)(cols, _this.cols()); tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols); _this.derived().swap(tmp); } @@ -952,8 +952,8 @@ struct conservative_resize_like_impl // The storage order does not allow us to use reallocation. typename Derived::PlainObject tmp(other); EIGEN_USING_STD_MATH(min) - const Index common_rows = min(tmp.rows(), _this.rows()); - const Index common_cols = min(tmp.cols(), _this.cols()); + const Index common_rows = (min)(tmp.rows(), _this.rows()); + const Index common_cols = (min)(tmp.cols(), _this.cols()); tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols); _this.derived().swap(tmp); } diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index e804cf6bb..17fcfeeb9 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -646,7 +646,7 @@ bool MatrixBase::isUpperTriangular(const RealScalar& prec) const EIGEN_USING_STD_MATH(min) for(Index j = 0; j < cols(); ++j) { - Index maxi = min(j, rows()-1); + Index maxi = (min)(j, rows()-1); for(Index i = 0; i <= maxi; ++i) { RealScalar absValue = abs(coeff(i,j)); @@ -680,7 +680,7 @@ bool MatrixBase::isLowerTriangular(const RealScalar& prec) const RealScalar threshold = maxAbsOnLowerPart * prec; for(Index j = 1; j < cols(); ++j) { - Index maxi = min(j, rows()-1); + Index maxi = (min)(j, rows()-1); for(Index i = 0; i < maxi; ++i) if(abs(coeff(i, j)) > threshold) return false; } @@ -896,7 +896,7 @@ struct triangular_assignment_loop EIGEN_USING_STD_MATH(min) for(Index j = 0; j < kernel.cols(); ++j) { - Index maxi = min(j, kernel.rows()); + Index maxi = (min)(j, kernel.rows()); Index i = 0; if (((Mode&Lower) && SetOpposite) || (Mode&Upper)) { -- cgit v1.2.3 From e74612b9a02f5fd7aa202b2b7f58bbf2af2a2f7f Mon Sep 17 00:00:00 2001 From: Gael Guennebaud Date: Wed, 12 Oct 2016 22:49:47 +0200 Subject: Remove double ;; --- Eigen/src/CholmodSupport/CholmodSupport.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/CholmodSupport/CholmodSupport.h b/Eigen/src/CholmodSupport/CholmodSupport.h index b8020a92c..8551ac02b 100644 --- a/Eigen/src/CholmodSupport/CholmodSupport.h +++ b/Eigen/src/CholmodSupport/CholmodSupport.h @@ -53,7 +53,7 @@ cholmod_sparse viewAsCholmod(SparseMatrix<_Scalar,_Options,_StorageIndex>& mat) { cholmod_sparse res; res.nzmax = mat.nonZeros(); - res.nrow = mat.rows();; + res.nrow = mat.rows(); res.ncol = mat.cols(); res.p = mat.outerIndexPtr(); res.i = mat.innerIndexPtr(); -- cgit v1.2.3 From ac63d6891ce87256de666c4d8f81eafaf99b8450 Mon Sep 17 00:00:00 2001 From: Avi Ginsburg Date: Thu, 13 Oct 2016 08:47:32 +0000 Subject: Patch to allow VS2015 & CUDA 8.0 to compile with Eigen included. I'm not sure whether to limit the check to this compiler combination (` || (EIGEN_COMP_MSVC == 1900 && __CUDACC_VER__) `) or to leave it as it is. I also don't know if this will have any affect on including Eigen in device code (I'm not in my current project). --- Eigen/src/Core/util/Macros.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 318ab9477..ce715716c 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -801,7 +801,7 @@ namespace Eigen { // just an empty macro ! #define EIGEN_EMPTY -#if EIGEN_COMP_MSVC_STRICT && EIGEN_COMP_MSVC < 1900 // for older MSVC versions using the base operator is sufficient (cf Bug 1000) +#if EIGEN_COMP_MSVC_STRICT && (EIGEN_COMP_MSVC < 1900 || __CUDACC_VER__) // for older MSVC versions, as well as 1900 && CUDA 8, using the base operator is sufficient (cf Bugs 1000, 1324) #define EIGEN_INHERIT_ASSIGNMENT_EQUAL_OPERATOR(Derived) \ using Base::operator =; #elif EIGEN_COMP_CLANG // workaround clang bug (see http://forum.kde.org/viewtopic.php?f=74&t=102653) -- cgit v1.2.3 From a94791b69a53504aba88f06caad596afa1ac6a34 Mon Sep 17 00:00:00 2001 From: Robert Lukierski Date: Thu, 13 Oct 2016 15:00:22 +0100 Subject: Fixes for min and abs after Benoit's comments, switched to numext. --- Eigen/src/Core/AssignEvaluator.h | 3 +-- Eigen/src/Core/CwiseNullaryOp.h | 3 +-- Eigen/src/Core/DiagonalMatrix.h | 3 +-- Eigen/src/Core/PlainObjectBase.h | 10 ++++------ Eigen/src/Core/TriangularMatrix.h | 19 +++++++------------ Eigen/src/Geometry/Hyperplane.h | 5 ++--- Eigen/src/Geometry/Quaternion.h | 6 ++---- 7 files changed, 18 insertions(+), 31 deletions(-) diff --git a/Eigen/src/Core/AssignEvaluator.h b/Eigen/src/Core/AssignEvaluator.h index 844b85ab3..abad8c790 100644 --- a/Eigen/src/Core/AssignEvaluator.h +++ b/Eigen/src/Core/AssignEvaluator.h @@ -517,7 +517,6 @@ struct dense_assignment_loop { EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) { - EIGEN_USING_STD_MATH(min) typedef typename Kernel::Scalar Scalar; typedef typename Kernel::PacketType PacketType; enum { @@ -555,7 +554,7 @@ struct dense_assignment_loop for(Index inner = alignedEnd; inner static EIGEN_STRONG_INLINE Derived& run(Derived& m) { m.setZero(); - EIGEN_USING_STD_MATH(min) - const Index size = (min)(m.rows(), m.cols()); + const Index size = numext::mini(m.rows(), m.cols()); for(Index i = 0; i < size; ++i) m.coeffRef(i,i) = typename Derived::Scalar(1); return m; } diff --git a/Eigen/src/Core/DiagonalMatrix.h b/Eigen/src/Core/DiagonalMatrix.h index f04facecc..c682c6d7f 100644 --- a/Eigen/src/Core/DiagonalMatrix.h +++ b/Eigen/src/Core/DiagonalMatrix.h @@ -290,12 +290,11 @@ MatrixBase::asDiagonal() const template bool MatrixBase::isDiagonal(const RealScalar& prec) const { - EIGEN_USING_STD_MATH(abs) if(cols() != rows()) return false; RealScalar maxAbsOnDiagonal = static_cast(-1); for(Index j = 0; j < cols(); ++j) { - RealScalar absOnDiagonal = abs(coeff(j,j)); + RealScalar absOnDiagonal = numext::abs(coeff(j,j)); if(absOnDiagonal > maxAbsOnDiagonal) maxAbsOnDiagonal = absOnDiagonal; } for(Index j = 0; j < cols(); ++j) diff --git a/Eigen/src/Core/PlainObjectBase.h b/Eigen/src/Core/PlainObjectBase.h index a4ade63b8..2dcd929e6 100644 --- a/Eigen/src/Core/PlainObjectBase.h +++ b/Eigen/src/Core/PlainObjectBase.h @@ -916,9 +916,8 @@ struct conservative_resize_like_impl { // The storage order does not allow us to use reallocation. typename Derived::PlainObject tmp(rows,cols); - EIGEN_USING_STD_MATH(min) - const Index common_rows = (min)(rows, _this.rows()); - const Index common_cols = (min)(cols, _this.cols()); + const Index common_rows = numext::mini(rows, _this.rows()); + const Index common_cols = numext::mini(cols, _this.cols()); tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols); _this.derived().swap(tmp); } @@ -951,9 +950,8 @@ struct conservative_resize_like_impl { // The storage order does not allow us to use reallocation. typename Derived::PlainObject tmp(other); - EIGEN_USING_STD_MATH(min) - const Index common_rows = (min)(tmp.rows(), _this.rows()); - const Index common_cols = (min)(tmp.cols(), _this.cols()); + const Index common_rows = numext::mini(tmp.rows(), _this.rows()); + const Index common_cols = numext::mini(tmp.cols(), _this.cols()); tmp.block(0,0,common_rows,common_cols) = _this.block(0,0,common_rows,common_cols); _this.derived().swap(tmp); } diff --git a/Eigen/src/Core/TriangularMatrix.h b/Eigen/src/Core/TriangularMatrix.h index 17fcfeeb9..71f5d4f29 100644 --- a/Eigen/src/Core/TriangularMatrix.h +++ b/Eigen/src/Core/TriangularMatrix.h @@ -641,22 +641,20 @@ MatrixBase::triangularView() const template bool MatrixBase::isUpperTriangular(const RealScalar& prec) const { - EIGEN_USING_STD_MATH(abs) RealScalar maxAbsOnUpperPart = static_cast(-1); - EIGEN_USING_STD_MATH(min) for(Index j = 0; j < cols(); ++j) { - Index maxi = (min)(j, rows()-1); + Index maxi = numext::mini(j, rows()-1); for(Index i = 0; i <= maxi; ++i) { - RealScalar absValue = abs(coeff(i,j)); + RealScalar absValue = numext::abs(coeff(i,j)); if(absValue > maxAbsOnUpperPart) maxAbsOnUpperPart = absValue; } } RealScalar threshold = maxAbsOnUpperPart * prec; for(Index j = 0; j < cols(); ++j) for(Index i = j+1; i < rows(); ++i) - if(abs(coeff(i, j)) > threshold) return false; + if(numext::abs(coeff(i, j)) > threshold) return false; return true; } @@ -668,21 +666,19 @@ bool MatrixBase::isUpperTriangular(const RealScalar& prec) const template bool MatrixBase::isLowerTriangular(const RealScalar& prec) const { - EIGEN_USING_STD_MATH(abs) - EIGEN_USING_STD_MATH(min) RealScalar maxAbsOnLowerPart = static_cast(-1); for(Index j = 0; j < cols(); ++j) for(Index i = j; i < rows(); ++i) { - RealScalar absValue = abs(coeff(i,j)); + RealScalar absValue = numext::abs(coeff(i,j)); if(absValue > maxAbsOnLowerPart) maxAbsOnLowerPart = absValue; } RealScalar threshold = maxAbsOnLowerPart * prec; for(Index j = 1; j < cols(); ++j) { - Index maxi = (min)(j, rows()-1); + Index maxi = numext::mini(j, rows()-1); for(Index i = 0; i < maxi; ++i) - if(abs(coeff(i, j)) > threshold) return false; + if(numext::abs(coeff(i, j)) > threshold) return false; } return true; } @@ -893,10 +889,9 @@ struct triangular_assignment_loop EIGEN_DEVICE_FUNC static inline void run(Kernel &kernel) { - EIGEN_USING_STD_MATH(min) for(Index j = 0; j < kernel.cols(); ++j) { - Index maxi = (min)(j, kernel.rows()); + Index maxi = numext::mini(j, kernel.rows()); Index i = 0; if (((Mode&Lower) && SetOpposite) || (Mode&Upper)) { diff --git a/Eigen/src/Geometry/Hyperplane.h b/Eigen/src/Geometry/Hyperplane.h index d66194287..07f2659b2 100644 --- a/Eigen/src/Geometry/Hyperplane.h +++ b/Eigen/src/Geometry/Hyperplane.h @@ -145,7 +145,7 @@ public: /** \returns the absolute distance between the plane \c *this and a point \a p. * \sa signedDistance() */ - EIGEN_DEVICE_FUNC inline Scalar absDistance(const VectorType& p) const { EIGEN_USING_STD_MATH(abs) return abs(signedDistance(p)); } + EIGEN_DEVICE_FUNC inline Scalar absDistance(const VectorType& p) const { return numext::abs(signedDistance(p)); } /** \returns the projection of a point \a p onto the plane \c *this. */ @@ -188,14 +188,13 @@ public: */ EIGEN_DEVICE_FUNC VectorType intersection(const Hyperplane& other) const { - EIGEN_USING_STD_MATH(abs) EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(VectorType, 2) Scalar det = coeffs().coeff(0) * other.coeffs().coeff(1) - coeffs().coeff(1) * other.coeffs().coeff(0); // since the line equations ax+by=c are normalized with a^2+b^2=1, the following tests // whether the two lines are approximately parallel. if(internal::isMuchSmallerThan(det, Scalar(1))) { // special case where the two lines are approximately parallel. Pick any point on the first line. - if(abs(coeffs().coeff(1))>abs(coeffs().coeff(0))) + if(numext::abs(coeffs().coeff(1))>numext::abs(coeffs().coeff(0))) return VectorType(coeffs().coeff(1), -coeffs().coeff(2)/coeffs().coeff(1)-coeffs().coeff(0)); else return VectorType(-coeffs().coeff(2)/coeffs().coeff(0)-coeffs().coeff(1), coeffs().coeff(0)); diff --git a/Eigen/src/Geometry/Quaternion.h b/Eigen/src/Geometry/Quaternion.h index 932f149e3..f6ef1bcf6 100644 --- a/Eigen/src/Geometry/Quaternion.h +++ b/Eigen/src/Geometry/Quaternion.h @@ -705,9 +705,8 @@ EIGEN_DEVICE_FUNC inline typename internal::traits::Scalar QuaternionBase::angularDistance(const QuaternionBase& other) const { EIGEN_USING_STD_MATH(atan2) - EIGEN_USING_STD_MATH(abs) Quaternion d = (*this) * other.conjugate(); - return Scalar(2) * atan2( d.vec().norm(), abs(d.w()) ); + return Scalar(2) * atan2( d.vec().norm(), numext::abs(d.w()) ); } @@ -725,10 +724,9 @@ QuaternionBase::slerp(const Scalar& t, const QuaternionBase::epsilon(); Scalar d = this->dot(other); - Scalar absD = abs(d); + Scalar absD = numext::abs(d); Scalar scale0; Scalar scale1; -- cgit v1.2.3 From d0ee2267d6eef6a1c5e6a69a6f7333bd4b49fbfb Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 13 Oct 2016 10:59:46 -0700 Subject: Relaxed the resizing checks so that they don't fail with gcc >= 5.3 --- unsupported/test/cxx11_tensor_simple.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/unsupported/test/cxx11_tensor_simple.cpp b/unsupported/test/cxx11_tensor_simple.cpp index fe860c970..5a0d339ef 100644 --- a/unsupported/test/cxx11_tensor_simple.cpp +++ b/unsupported/test/cxx11_tensor_simple.cpp @@ -299,22 +299,21 @@ static void test_resize() VERIFY_IS_EQUAL(epsilon.dimension(0), 2); VERIFY_IS_EQUAL(epsilon.dimension(1), 3); VERIFY_IS_EQUAL(epsilon.dimension(2), 7); - VERIFY_IS_EQUAL(epsilon.dimensions().TotalSize(), 2*3*7); + VERIFY_IS_EQUAL(epsilon.size(), 2*3*7); const int* old_data = epsilon.data(); epsilon.resize(3,2,7); VERIFY_IS_EQUAL(epsilon.dimension(0), 3); VERIFY_IS_EQUAL(epsilon.dimension(1), 2); VERIFY_IS_EQUAL(epsilon.dimension(2), 7); - VERIFY_IS_EQUAL(epsilon.dimensions().TotalSize(), 2*3*7); + VERIFY_IS_EQUAL(epsilon.size(), 2*3*7); VERIFY_IS_EQUAL(epsilon.data(), old_data); epsilon.resize(3,5,7); VERIFY_IS_EQUAL(epsilon.dimension(0), 3); VERIFY_IS_EQUAL(epsilon.dimension(1), 5); VERIFY_IS_EQUAL(epsilon.dimension(2), 7); - VERIFY_IS_EQUAL(epsilon.dimensions().TotalSize(), 3*5*7); - VERIFY_IS_NOT_EQUAL(epsilon.data(), old_data); + VERIFY_IS_EQUAL(epsilon.size(), 3*5*7); } void test_cxx11_tensor_simple() -- cgit v1.2.3 From 524fa4c46f8c5c1bc01f7754188e6883f669a543 Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Fri, 14 Oct 2016 12:09:55 +0100 Subject: Reducing the code by generalising sycl backend functions/structs. --- Eigen/Core | 65 +-- .../Eigen/CXX11/src/Tensor/TensorReduction.h | 50 +- unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h | 27 +- .../Tensor/TensorSyclConvertToDeviceExpression.h | 237 ++------- .../CXX11/src/Tensor/TensorSyclExprConstructor.h | 578 ++++++--------------- .../CXX11/src/Tensor/TensorSyclExtractAccessor.h | 433 +++------------ .../CXX11/src/Tensor/TensorSyclExtractFunctors.h | 297 +++-------- .../Eigen/CXX11/src/Tensor/TensorSyclLeafCount.h | 155 ++---- .../Eigen/CXX11/src/Tensor/TensorSyclPlaceHolder.h | 142 ++--- .../CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h | 293 +++-------- unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h | 43 +- .../Eigen/CXX11/src/Tensor/TensorSyclTuple.h | 168 +++--- unsupported/test/CMakeLists.txt | 14 +- unsupported/test/cxx11_tensor_broadcast_sycl.cpp | 4 +- unsupported/test/cxx11_tensor_device_sycl.cpp | 6 +- unsupported/test/cxx11_tensor_forced_eval_sycl.cpp | 6 +- unsupported/test/cxx11_tensor_sycl.cpp | 92 ++-- 17 files changed, 727 insertions(+), 1883 deletions(-) diff --git a/Eigen/Core b/Eigen/Core index ed007dfa9..53cb72c95 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -11,46 +11,8 @@ #ifndef EIGEN_CORE_H #define EIGEN_CORE_H -// first thing Eigen does: stop the compiler from committing suicide -#include "src/Core/util/DisableStupidWarnings.h" - -// Handle NVCC/CUDA -#ifdef __CUDACC__ - // Do not try asserts on CUDA! - #ifndef EIGEN_NO_DEBUG - #define EIGEN_NO_DEBUG - #endif - - #ifdef EIGEN_INTERNAL_DEBUGGING - #undef EIGEN_INTERNAL_DEBUGGING - #endif - - // Do not try to vectorize on CUDA! - #ifndef EIGEN_DONT_VECTORIZE - #define EIGEN_DONT_VECTORIZE - #endif - - #ifdef EIGEN_EXCEPTIONS - #undef EIGEN_EXCEPTIONS - #endif - - // All functions callable from CUDA code must be qualified with __device__ - #define EIGEN_DEVICE_FUNC __host__ __device__ - -#else - #define EIGEN_DEVICE_FUNC - -#endif - -// When compiling CUDA device code with NVCC, pull in math functions from the -// global namespace. In host mode, and when device doee with clang, use the -// std versions. -#if defined(__CUDA_ARCH__) && defined(__NVCC__) - #define EIGEN_USING_STD_MATH(FUNC) using ::FUNC; -#else - #define EIGEN_USING_STD_MATH(FUNC) using std::FUNC; -#endif +/// This will no longer be needed after the next release of the computecppCE #ifdef EIGEN_USE_SYCL #undef min #undef max @@ -60,9 +22,9 @@ #include #endif -// We need these predefines to determine if asserts need to be disabled for the device compiler -#if defined(__SYCL_DEVICE_ONLY__) - // Do not try asserts on SYCL! +// Handle NVCC/CUDA/SYCL +#if defined(__CUDACC__) || defined(__SYCL_DEVICE_ONLY__) + // Do not try asserts on CUDA and SYCL! #ifndef EIGEN_NO_DEBUG #define EIGEN_NO_DEBUG #endif @@ -71,7 +33,7 @@ #undef EIGEN_INTERNAL_DEBUGGING #endif - // Do not try to vectorize on SYCL! + // Do not try to vectorize on CUDA and SYCL! #ifndef EIGEN_DONT_VECTORIZE #define EIGEN_DONT_VECTORIZE #endif @@ -80,12 +42,25 @@ #undef EIGEN_EXCEPTIONS #endif + // All functions callable from CUDA code must be qualified with __device__ + #ifdef __CUDACC__ + #define EIGEN_DEVICE_FUNC __host__ __device__ + // For Sycl we dont need that + #else + #define EIGEN_DEVICE_FUNC + #endif +#else #define EIGEN_DEVICE_FUNC #endif -#if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(EIGEN_EXCEPTIONS) && !defined(EIGEN_USE_SYCL) - #define EIGEN_EXCEPTIONS +// When compiling CUDA device code with NVCC, pull in math functions from the +// global namespace. In host mode, and when device doee with clang, use the +// std versions. +#if defined(__CUDA_ARCH__) && defined(__NVCC__) + #define EIGEN_USING_STD_MATH(FUNC) using ::FUNC; +#else + #define EIGEN_USING_STD_MATH(FUNC) using std::FUNC; #endif #ifdef EIGEN_EXCEPTIONS diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h index a87777b22..2cbcc87c6 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReduction.h @@ -423,15 +423,15 @@ struct TensorEvaluator, Device> // Precompute output strides. if (NumOutputDims > 0) { if (static_cast(Layout) == static_cast(ColMajor)) { - m_outputStrides[0] = 1; - for (int i = 1; i < NumOutputDims; ++i) { - m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1]; - } + m_outputStrides[0] = 1; + for (int i = 1; i < NumOutputDims; ++i) { + m_outputStrides[i] = m_outputStrides[i - 1] * m_dimensions[i - 1]; + } } else { - m_outputStrides.back() = 1; - for (int i = NumOutputDims - 2; i >= 0; --i) { - m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1]; - } + m_outputStrides.back() = 1; + for (int i = NumOutputDims - 2; i >= 0; --i) { + m_outputStrides[i] = m_outputStrides[i + 1] * m_dimensions[i + 1]; + } } } @@ -439,27 +439,27 @@ struct TensorEvaluator, Device> if (NumInputDims > 0) { array input_strides; if (static_cast(Layout) == static_cast(ColMajor)) { - input_strides[0] = 1; - for (int i = 1; i < NumInputDims; ++i) { - input_strides[i] = input_strides[i-1] * input_dims[i-1]; - } + input_strides[0] = 1; + for (int i = 1; i < NumInputDims; ++i) { + input_strides[i] = input_strides[i-1] * input_dims[i-1]; + } } else { - input_strides.back() = 1; - for (int i = NumInputDims - 2; i >= 0; --i) { - input_strides[i] = input_strides[i + 1] * input_dims[i + 1]; - } + input_strides.back() = 1; + for (int i = NumInputDims - 2; i >= 0; --i) { + input_strides[i] = input_strides[i + 1] * input_dims[i + 1]; + } } int outputIndex = 0; int reduceIndex = 0; for (int i = 0; i < NumInputDims; ++i) { - if (m_reduced[i]) { - m_reducedStrides[reduceIndex] = input_strides[i]; - ++reduceIndex; - } else { - m_preservedStrides[outputIndex] = input_strides[i]; - ++outputIndex; - } + if (m_reduced[i]) { + m_reducedStrides[reduceIndex] = input_strides[i]; + ++reduceIndex; + } else { + m_preservedStrides[outputIndex] = input_strides[i]; + ++outputIndex; + } } } @@ -578,7 +578,7 @@ struct TensorEvaluator, Device> Op reducer(m_reducer); if (ReducingInnerMostDims || RunningFullReduction) { const Index num_values_to_reduce = - (static_cast(Layout) == static_cast(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1]; + (static_cast(Layout) == static_cast(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1]; return internal::InnerMostDimReducer::reduce(*this, firstInput(index), num_values_to_reduce, reducer); } else { @@ -602,7 +602,7 @@ struct TensorEvaluator, Device> EIGEN_ALIGN_MAX typename internal::remove_const::type values[PacketSize]; if (ReducingInnerMostDims) { const Index num_values_to_reduce = - (static_cast(Layout) == static_cast(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1]; + (static_cast(Layout) == static_cast(ColMajor)) ? m_preservedStrides[0] : m_preservedStrides[NumPreservedStrides - 1]; const Index firstIndex = firstInput(index); for (Index i = 0; i < PacketSize; ++i) { Op reducer(m_reducer); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h index 277dd739c..da15f7942 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSycl.h @@ -11,20 +11,35 @@ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. // General include header of SYCL target for Tensor Module -#ifndef TENSORSYCL_H -#define TENSORSYCL_H +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_H +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_H #ifdef EIGEN_USE_SYCL -// trait class to extract different attribute contents -template -struct Trait; // global pointer to set different attribute state for a class template struct MakeGlobalPointer { typedef typename cl::sycl::global_ptr::pointer_t Type; }; +namespace Eigen { +namespace TensorSycl { +namespace internal { + +/// This struct is used for special expression nodes with no operations (for example assign and selectOP). + struct NoOP; + +template struct GetType{ + typedef const T Type; +}; +template struct GetType{ + typedef T Type; +}; + +} +} +} + // tuple construction #include "TensorSyclTuple.h" @@ -59,4 +74,4 @@ struct MakeGlobalPointer { #include "TensorSyclRun.h" #endif // end of EIGEN_USE_SYCL -#endif // TENSORSYCL_H +#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_H diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h index b3748131b..a94c30426 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h @@ -19,12 +19,13 @@ * *****************************************************************/ -#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_TENSORSYCL_CONVERT_TO_DEVICE_EXPRESSION_HPP -#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_TENSORSYCL_CONVERT_TO_DEVICE_EXPRESSION_HPP +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_CONVERT_TO_DEVICE_EXPRESSION_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_CONVERT_TO_DEVICE_EXPRESSION_HPP namespace Eigen { namespace TensorSycl { namespace internal { + /// \struct ConvertToDeviceExpression /// \brief This struct is used to convert the MakePointer in the host expression /// to the MakeGlobalPointer for the device expression. For the leafNodes @@ -33,204 +34,74 @@ namespace internal { template struct ConvertToDeviceExpression; -/// specialisation of the \ref ConvertToDeviceExpression struct when the node -/// type is TensorMap -template class MakePointer_> -struct ConvertToDeviceExpression< - TensorMap, Options2_, - MakePointer_>> { - using Type = TensorMap, - Options2_, MakeGlobalPointer>; +template class NonOpCategory, bool IsConst, typename... Args> +struct NonOpConversion{ + typedef typename GetType::Type...> >::Type Type; }; -/// specialisation of the \ref ConvertToDeviceExpression struct when the node -/// type is const TensorMap -template class MakePointer_> -struct ConvertToDeviceExpression< - const TensorMap, - Options2_, MakePointer_>> { - using Type = - const TensorMap, - Options2_, MakeGlobalPointer>; -}; -/// specialisation of the \ref ConvertToDeviceExpression struct when the node -/// type is const TensorCwiseNullaryOp -template -struct ConvertToDeviceExpression> { - using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; - using Type = const TensorCwiseNullaryOp; +template class > class NonOpCategory, bool IsConst, typename Args> +struct DeviceConvertor{ + typedef typename GetType::Type, MakeGlobalPointer> >::Type Type; }; /// specialisation of the \ref ConvertToDeviceExpression struct when the node -/// type is TensorCwiseNullaryOp -template -struct ConvertToDeviceExpression> { - using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; - using Type = TensorCwiseNullaryOp; -}; - -/// specialisation of the \ref ConvertToDeviceExpression struct when the node -/// type is const TensorBroadcastingOp -template -struct ConvertToDeviceExpression> { - using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; - using Type = const TensorBroadcastingOp; -}; - -/// specialisation of the \ref ConvertToDeviceExpression struct when the node -/// type is TensorBroadcastingOp -template -struct ConvertToDeviceExpression> { - using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; - using Type = TensorBroadcastingOp; -}; - -/// specialisation of the \ref ConvertToDeviceExpression struct when the node -/// type is const TensorCwiseUnaryOp -template -struct ConvertToDeviceExpression> { - using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; - using Type = const TensorCwiseUnaryOp; -}; - -/// specialisation of the \ref ConvertToDeviceExpression struct when the node -/// type is TensorCwiseUnaryOp -template -struct ConvertToDeviceExpression> { - using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; - using Type = TensorCwiseUnaryOp; -}; - -/// specialisation of the \ref ConvertToDeviceExpression struct when the node -/// type is const TensorCwiseBinaryOp -template -struct ConvertToDeviceExpression< - const TensorCwiseBinaryOp> { - using LHSPlaceHolderType = typename ConvertToDeviceExpression::Type; - using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; - using Type = - const TensorCwiseBinaryOp; -}; - -/// specialisation of the \ref ConvertToDeviceExpression struct when the node -/// type is TensorCwiseBinaryOp -template -struct ConvertToDeviceExpression> { - using LHSPlaceHolderType = typename ConvertToDeviceExpression::Type; - using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; - using Type = TensorCwiseBinaryOp; +/// type is TensorMap +#define TENSORMAPCONVERT(CVQual)\ +template class MakePointer_>\ +struct ConvertToDeviceExpression, Options2_, MakePointer_> > {\ + typedef CVQual TensorMap, Options2_, MakeGlobalPointer> Type;\ }; -/// specialisation of the \ref ConvertToDeviceExpression struct when the node -/// type is const TensorCwiseTernaryOp -template -struct ConvertToDeviceExpression< - const TensorCwiseTernaryOp> { - using Arg1PlaceHolderType = - typename ConvertToDeviceExpression::Type; - using Arg2PlaceHolderType = - typename ConvertToDeviceExpression::Type; - using Arg3PlaceHolderType = - typename ConvertToDeviceExpression::Type; - using Type = - const TensorCwiseTernaryOp; -}; +TENSORMAPCONVERT(const) +TENSORMAPCONVERT() +#undef TENSORMAPCONVERT /// specialisation of the \ref ConvertToDeviceExpression struct when the node -/// type is TensorCwiseTernaryOp -template -struct ConvertToDeviceExpression< - TensorCwiseTernaryOp> { - using Arg1PlaceHolderType = - typename ConvertToDeviceExpression::Type; - using Arg2PlaceHolderType = - typename ConvertToDeviceExpression::Type; - using Arg3PlaceHolderType = - typename ConvertToDeviceExpression::Type; - using Type = TensorCwiseTernaryOp; +/// type is TensorCwiseNullaryOp, TensorCwiseUnaryOp, TensorCwiseBinaryOp, TensorCwiseTernaryOp, TensorBroadcastingOp +#define CATEGORYCONVERT(CVQual)\ +template class Category, typename OP, typename... subExprs>\ +struct ConvertToDeviceExpression > {\ + typedef CVQual Category::Type... > Type;\ }; +CATEGORYCONVERT(const) +CATEGORYCONVERT() +#undef CATEGORYCONVERT -/// specialisation of the \ref ConvertToDeviceExpression struct when the node -/// type is const TensorCwiseSelectOp -template -struct ConvertToDeviceExpression< - const TensorSelectOp> { - using IfPlaceHolderType = typename ConvertToDeviceExpression::Type; - using ThenPlaceHolderType = - typename ConvertToDeviceExpression::Type; - using ElsePlaceHolderType = - typename ConvertToDeviceExpression::Type; - using Type = const TensorSelectOp; -}; /// specialisation of the \ref ConvertToDeviceExpression struct when the node -/// type is TensorCwiseSelectOp -template -struct ConvertToDeviceExpression> { - using IfPlaceHolderType = typename ConvertToDeviceExpression::Type; - using ThenPlaceHolderType = - typename ConvertToDeviceExpression::Type; - using ElsePlaceHolderType = - typename ConvertToDeviceExpression::Type; - using Type = TensorSelectOp; -}; +/// type is TensorCwiseSelectOp +#define SELECTOPCONVERT(CVQual, Res)\ +template \ +struct ConvertToDeviceExpression >\ +: NonOpConversion {}; +SELECTOPCONVERT(const, true) +SELECTOPCONVERT(, false) +#undef SELECTOPCONVERT /// specialisation of the \ref ConvertToDeviceExpression struct when the node /// type is const AssingOP -template -struct ConvertToDeviceExpression> { - using LHSPlaceHolderType = typename ConvertToDeviceExpression::Type; - using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; - using Type = const TensorAssignOp; -}; - -/// specialisation of the \ref ConvertToDeviceExpression struct when the node -/// type is AssingOP -template -struct ConvertToDeviceExpression> { - using LHSPlaceHolderType = typename ConvertToDeviceExpression::Type; - using RHSPlaceHolderType = typename ConvertToDeviceExpression::Type; - using Type = TensorAssignOp; -}; - -/// specialisation of the \ref ConvertToDeviceExpression struct when the node -/// type is const TensorForcedEvalOp -template -struct ConvertToDeviceExpression> { - using PlaceHolderType = typename ConvertToDeviceExpression::Type; - using Type = const TensorForcedEvalOp; -}; - -/// specialisation of the \ref ConvertToDeviceExpression struct when the node -/// type is const TensorForcedEvalOp -template -struct ConvertToDeviceExpression> { - using PlaceHolderType = typename ConvertToDeviceExpression::Type; - using Type = TensorForcedEvalOp; -}; - -/// specialisation of the \ref ConvertToDeviceExpression struct when the node -/// type is const TensorEvalToOp -template -struct ConvertToDeviceExpression> { - using PlaceHolderType = typename ConvertToDeviceExpression::Type; - using Type = const TensorEvalToOp; -}; - -/// specialisation of the \ref ConvertToDeviceExpression struct when the node -/// type is TensorEvalToOp -template -struct ConvertToDeviceExpression> { - using PlaceHolderType = typename ConvertToDeviceExpression::Type; - using Type = TensorEvalToOp; -}; +#define ASSIGNCONVERT(CVQual, Res)\ +template \ +struct ConvertToDeviceExpression >\ +: NonOpConversion{}; + +ASSIGNCONVERT(const, true) +ASSIGNCONVERT(, false) +#undef ASSIGNCONVERT + +/// specialisation of the \ref ConvertToDeviceExpression struct when the node +/// type is either TensorForcedEvalOp or TensorEvalToOp +#define KERNELBROKERCONVERT(CVQual, Res, ExprNode)\ +template \ +struct ConvertToDeviceExpression > \ +: DeviceConvertor{}; + +KERNELBROKERCONVERT(const, true, TensorForcedEvalOp) +KERNELBROKERCONVERT(, false, TensorForcedEvalOp) +KERNELBROKERCONVERT(const, true, TensorEvalToOp) +KERNELBROKERCONVERT(, false, TensorEvalToOp) +#undef KERNELBROKERCONVERT } // namespace internal } // namespace TensorSycl } // namespace Eigen diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h index fe3994175..833d5e271 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h @@ -19,8 +19,8 @@ * *****************************************************************/ -#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXPR_CONSTRUCTOR_HPP -#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSORYSYCL_EXPR_CONSTRUCTOR_HPP +#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXPR_CONSTRUCTOR_HPP +#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_EXPR_CONSTRUCTOR_HPP namespace Eigen { namespace TensorSycl { @@ -30,8 +30,7 @@ namespace internal { template struct EvalToLHSConstructor { PtrType expr; - EvalToLHSConstructor(const utility::tuple::Tuple &t) - : expr((&(*(utility::tuple::get(t).get_pointer())))) {} + EvalToLHSConstructor(const utility::tuple::Tuple &t): expr((&(*(utility::tuple::get(t).get_pointer())))) {} }; /// \struct ExprConstructor is used to reconstruct the expression on the device @@ -44,447 +43,166 @@ struct EvalToLHSConstructor { template struct ExprConstructor; -/// specialisation of the \ref ExprConstructor struct when the node type is -/// const TensorMap -template class MakePointer_, size_t N, typename... Params> -struct ExprConstructor< - const TensorMap, - Options2_, MakeGlobalPointer>, - const Eigen::internal::PlaceHolder< - const TensorMap, - Options3_, MakePointer_>, - N>, - Params...> { - using Type = - const TensorMap, - Options2_, MakeGlobalPointer>; - - Type expr; - - template - ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple &t) - : expr(Type((&(*(utility::tuple::get(t).get_pointer()))), - fd.dimensions())) {} -}; - /// specialisation of the \ref ExprConstructor struct when the node type is /// TensorMap -template class MakePointer_, size_t N, typename... Params> -struct ExprConstructor< - TensorMap, Options2_, - MakeGlobalPointer>, - Eigen::internal::PlaceHolder< - TensorMap, Options3_, - MakePointer_>, - N>, - Params...> { - using Type = TensorMap, - Options2_, MakeGlobalPointer>; - - Type expr; - template - ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple &t) - : expr(Type((&(*(utility::tuple::get(t).get_pointer()))), - fd.dimensions())) {} -}; - -/// specialisation of the \ref ExprConstructor struct when the node type is -/// TensorCwiseNullaryOp -template -struct ExprConstructor, - TensorCwiseNullaryOp, Params...> { - using my_type = ExprConstructor; - my_type rhsExpr; - using Type = TensorCwiseNullaryOp; - Type expr; - - template - ExprConstructor(FuncDetector &funcD, - const utility::tuple::Tuple &t) - : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {} -}; - -/// specialisation of the \ref ExprConstructor struct when the node type is -/// const TensorCwiseNullaryOp -template -struct ExprConstructor, - const TensorCwiseNullaryOp, Params...> { - using my_type = const ExprConstructor; - my_type rhsExpr; - using Type = const TensorCwiseNullaryOp; - Type expr; - - template - ExprConstructor(FuncDetector &funcD, - const utility::tuple::Tuple &t) - : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {} -}; - -/// specialisation of the \ref ExprConstructor struct when the node type is -/// TensorBroadcastingOp -template -struct ExprConstructor, - TensorBroadcastingOp, Params...> { - using my_type = ExprConstructor; - my_type rhsExpr; - using Type = TensorBroadcastingOp; - Type expr; - - template - ExprConstructor(FuncDetector &funcD, - const utility::tuple::Tuple &t) - : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {} -}; - -/// specialisation of the \ref ExprConstructor struct when the node type is -/// const TensorBroadcastingOp -template -struct ExprConstructor, - const TensorBroadcastingOp, Params...> { - using my_type = const ExprConstructor; - my_type rhsExpr; - using Type = const TensorBroadcastingOp; - Type expr; - - template - ExprConstructor(FuncDetector &funcD, - const utility::tuple::Tuple &t) - : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {} -}; - -/// specialisation of the \ref ExprConstructor struct when the node type is -/// TensorCwiseUnaryOp -template -struct ExprConstructor, - TensorCwiseUnaryOp, Params...> { - using my_type = ExprConstructor; - using Type = TensorCwiseUnaryOp; - my_type rhsExpr; - Type expr; - - template - ExprConstructor(FuncDetector &funcD, utility::tuple::Tuple &t) - : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {} -}; - -/// specialisation of the \ref ExprConstructor struct when the node type is -/// const TensorCwiseUnaryOp -template -struct ExprConstructor, - const TensorCwiseUnaryOp, Params...> { - using my_type = ExprConstructor; - using Type = const TensorCwiseUnaryOp; - my_type rhsExpr; - Type expr; - - template - ExprConstructor(FuncDetector &funcD, - const utility::tuple::Tuple &t) - : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {} -}; - -/// specialisation of the \ref ExprConstructor struct when the node type is -/// TensorCwiseBinaryOp -template -struct ExprConstructor, - TensorCwiseBinaryOp, Params...> { - using my_left_type = ExprConstructor; - using my_right_type = ExprConstructor; - using Type = TensorCwiseBinaryOp; - - my_left_type lhsExpr; - my_right_type rhsExpr; - Type expr; - - template - ExprConstructor(FuncDetector &funcD, - const utility::tuple::Tuple &t) - : lhsExpr(funcD.lhsExpr, t), - rhsExpr(funcD.rhsExpr, t), - expr(lhsExpr.expr, rhsExpr.expr, funcD.func) {} -}; - -/// specialisation of the \ref ExprConstructor struct when the node type is -/// const TensorCwiseBinaryOp -template -struct ExprConstructor, - const TensorCwiseBinaryOp, - Params...> { - using my_left_type = ExprConstructor; - using my_right_type = ExprConstructor; - using Type = const TensorCwiseBinaryOp; - - my_left_type lhsExpr; - my_right_type rhsExpr; - Type expr; - - template - ExprConstructor(FuncDetector &funcD, - const utility::tuple::Tuple &t) - : lhsExpr(funcD.lhsExpr, t), - rhsExpr(funcD.rhsExpr, t), - expr(lhsExpr.expr, rhsExpr.expr, funcD.func) {} -}; - -/// specialisation of the \ref ExprConstructor struct when the node type is -/// const TensorCwiseTernaryOp -template -struct ExprConstructor< - const TensorCwiseTernaryOp, - const TensorCwiseTernaryOp, Params...> { - using my_arg1_type = ExprConstructor; - using my_arg2_type = ExprConstructor; - using my_arg3_type = ExprConstructor; - using Type = const TensorCwiseTernaryOp; - - my_arg1_type arg1Expr; - my_arg2_type arg2Expr; - my_arg3_type arg3Expr; - Type expr; - template - ExprConstructor(FuncDetector &funcD, - const utility::tuple::Tuple &t) - : arg1Expr(funcD.arg1Expr, t), - arg2Expr(funcD.arg2Expr, t), - arg3Expr(funcD.arg3Expr, t), - expr(arg1Expr.expr, arg2Expr.expr, arg3Expr.expr, funcD.func) {} -}; +#define TENSORMAP(CVQual)\ +template class MakePointer_, size_t N, typename... Params>\ +struct ExprConstructor< CVQual TensorMap, Options2_, MakeGlobalPointer>,\ +CVQual Eigen::internal::PlaceHolder, Options3_, MakePointer_>, N>, Params...>{\ + typedef CVQual TensorMap, Options2_, MakeGlobalPointer> Type;\ + Type expr;\ + template \ + ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple &t)\ + : expr(Type((&(*(utility::tuple::get(t).get_pointer()))), fd.dimensions())) {}\ +}; + +TENSORMAP(const) +TENSORMAP() +#undef TENSORMAP + +#define UNARYCATEGORY(CVQual)\ +template class UnaryCategory, typename OP, typename OrigRHSExpr, typename RHSExpr, typename... Params>\ +struct ExprConstructor, CVQual UnaryCategory, Params...> {\ + typedef ExprConstructor my_type;\ + my_type rhsExpr;\ + typedef CVQual UnaryCategory Type;\ + Type expr;\ + template \ + ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple &t)\ + : rhsExpr(funcD.rhsExpr, t), expr(rhsExpr.expr, funcD.func) {}\ +}; + +UNARYCATEGORY(const) +UNARYCATEGORY() +#undef UNARYCATEGORY + +/// specialisation of the \ref ExprConstructor struct when the node type is +/// TensorBinaryOp +#define BINARYCATEGORY(CVQual)\ +template class BinaryCategory, typename OP, typename OrigLHSExpr, typename OrigRHSExpr, typename LHSExpr,\ +typename RHSExpr, typename... Params>\ +struct ExprConstructor, CVQual BinaryCategory, Params...> {\ + typedef ExprConstructor my_left_type;\ + typedef ExprConstructor my_right_type;\ + typedef CVQual BinaryCategory Type;\ + my_left_type lhsExpr;\ + my_right_type rhsExpr;\ + Type expr;\ + template \ + ExprConstructor(FuncDetector &funcD, const utility::tuple::Tuple &t)\ + : lhsExpr(funcD.lhsExpr, t),rhsExpr(funcD.rhsExpr, t), expr(lhsExpr.expr, rhsExpr.expr, funcD.func) {}\ +}; + +BINARYCATEGORY(const) +BINARYCATEGORY() +#undef BINARYCATEGORY /// specialisation of the \ref ExprConstructor struct when the node type is /// TensorCwiseTernaryOp -template -struct ExprConstructor< - TensorCwiseTernaryOp, - TensorCwiseTernaryOp, Params...> { - using my_arg1_type = ExprConstructor; - using my_arg2_type = ExprConstructor; - using my_arg3_type = ExprConstructor; - using Type = TensorCwiseTernaryOp; - - my_arg1_type arg1Expr; - my_arg2_type arg2Expr; - my_arg3_type arg3Expr; - Type expr; - template - ExprConstructor(FuncDetector &funcD, - const utility::tuple::Tuple &t) - : arg1Expr(funcD.arg1Expr, t), - arg2Expr(funcD.arg2Expr, t), - arg3Expr(funcD.arg3Expr, t), - expr(arg1Expr.expr, arg2Expr.expr, arg3Expr.expr, funcD.func) {} -}; - -/// specialisation of the \ref ExprConstructor struct when the node type is -/// const TensorCwiseSelectOp -template -struct ExprConstructor< - const TensorSelectOp, - const TensorSelectOp, Params...> { - using my_if_type = ExprConstructor; - using my_then_type = ExprConstructor; - using my_else_type = ExprConstructor; - using Type = const TensorSelectOp; - - my_if_type ifExpr; - my_then_type thenExpr; - my_else_type elseExpr; - Type expr; - template - ExprConstructor(FuncDetector &funcD, - const utility::tuple::Tuple &t) - : ifExpr(funcD.ifExpr, t), - thenExpr(funcD.thenExpr, t), - elseExpr(funcD.elseExpr, t), - expr(ifExpr.expr, thenExpr.expr, elseExpr.expr) {} -}; +#define TERNARYCATEGORY(CVQual)\ +template