From 16a56b2dddbfaf2d4b81d62be5e3139f12783ac8 Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Thu, 27 Jun 2019 12:25:09 +0100 Subject: [SYCL] This PR adds the minimum modifications to Eigen core required to run Eigen unsupported modules on devices supporting SYCL. * Adding SYCL memory model * Enabling/Disabling SYCL backend in Core * Supporting Vectorization --- Eigen/Core | 17 +- Eigen/src/Core/MathFunctions.h | 100 +++--- Eigen/src/Core/arch/GPU/Half.h | 2 +- Eigen/src/Core/arch/SYCL/InteropHeaders.h | 233 +++++++++---- Eigen/src/Core/arch/SYCL/MathFunctions.h | 207 +++++++----- Eigen/src/Core/arch/SYCL/PacketMath.h | 537 ++++++++++++++++++------------ Eigen/src/Core/arch/SYCL/TypeCasting.h | 64 ++-- Eigen/src/Core/functors/UnaryFunctors.h | 6 +- Eigen/src/Core/util/Macros.h | 33 +- test/main.h | 6 +- 10 files changed, 753 insertions(+), 452 deletions(-) diff --git a/Eigen/Core b/Eigen/Core index 759b1bb80..af741a241 100644 --- a/Eigen/Core +++ b/Eigen/Core @@ -101,13 +101,23 @@ #include #endif -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(EIGEN_USE_SYCL) #undef min #undef max #undef isnan #undef isinf #undef isfinite #include + #include + #include + #include + #include + #ifndef EIGEN_SYCL_LOCAL_THREAD_DIM0 + #define EIGEN_SYCL_LOCAL_THREAD_DIM0 16 + #endif + #ifndef EIGEN_SYCL_LOCAL_THREAD_DIM1 + #define EIGEN_SYCL_LOCAL_THREAD_DIM1 16 + #endif #endif @@ -207,12 +217,15 @@ using std::ptrdiff_t; #include "src/Core/arch/GPU/MathFunctions.h" #endif -#if defined EIGEN_VECTORIZE_SYCL +#if defined(EIGEN_USE_SYCL) + #include "src/Core/arch/SYCL/SyclMemoryModel.h" #include "src/Core/arch/SYCL/InteropHeaders.h" +#if !defined(EIGEN_DONT_VECTORIZE_SYCL) #include "src/Core/arch/SYCL/PacketMath.h" #include "src/Core/arch/SYCL/MathFunctions.h" #include "src/Core/arch/SYCL/TypeCasting.h" #endif +#endif #include "src/Core/arch/Default/Settings.h" #include "src/Core/functors/TernaryFunctors.h" diff --git a/Eigen/src/Core/MathFunctions.h b/Eigen/src/Core/MathFunctions.h index 34dd15d85..685042dc0 100644 --- a/Eigen/src/Core/MathFunctions.h +++ b/Eigen/src/Core/MathFunctions.h @@ -954,7 +954,7 @@ EIGEN_ALWAYS_INLINE long double maxi(const long double& x, const long double& y) } #endif -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) #define SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_BINARY(NAME, FUNC) \ @@ -991,7 +991,7 @@ EIGEN_ALWAYS_INLINE long double maxi(const long double& x, const long double& y) SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC,cl::sycl::cl_double) #define SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(NAME, FUNC, RET_TYPE) \ SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, RET_TYPE, cl::sycl::cl_float) \ - SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, RET_TYPE, cl::sycl::cl_double) + SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, RET_TYPE, cl::sycl::cl_double) #define SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, RET_TYPE, ARG_TYPE) \ template<> \ @@ -1021,7 +1021,7 @@ SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(mini, fmin) SYCL_SPECIALIZE_INTEGER_TYPES_BINARY(maxi, max) SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(maxi, fmax) -#endif // defined(__SYCL_DEVICE_ONLY__) +#endif template @@ -1104,9 +1104,9 @@ inline EIGEN_MATHFUNC_RETVAL(hypot, Scalar) hypot(const Scalar& x, const Scalar& return EIGEN_MATHFUNC_IMPL(hypot, Scalar)::run(x, y); } -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(hypot, hypot) -#endif // defined(__SYCL_DEVICE_ONLY__) +#endif template EIGEN_DEVICE_FUNC @@ -1115,9 +1115,9 @@ inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x) return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x); } -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(log1p, log1p) -#endif //defined(__SYCL_DEVICE_ONLY__) +#endif #if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE @@ -1134,19 +1134,19 @@ inline typename internal::pow_impl::result_type pow(const Scala return internal::pow_impl::run(x, y); } -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(pow, pow) -#endif // defined(__SYCL_DEVICE_ONLY__) +#endif template EIGEN_DEVICE_FUNC bool (isnan) (const T &x) { return internal::isnan_impl(x); } template EIGEN_DEVICE_FUNC bool (isinf) (const T &x) { return internal::isinf_impl(x); } template EIGEN_DEVICE_FUNC bool (isfinite)(const T &x) { return internal::isfinite_impl(x); } -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(isnan, isnan, bool) SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(isinf, isinf, bool) SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(isfinite, isfinite, bool) -#endif // defined(__SYCL_DEVICE_ONLY__) +#endif template EIGEN_DEVICE_FUNC @@ -1155,9 +1155,9 @@ inline EIGEN_MATHFUNC_RETVAL(round, Scalar) round(const Scalar& x) return EIGEN_MATHFUNC_IMPL(round, Scalar)::run(x); } -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(round, round) -#endif // defined(__SYCL_DEVICE_ONLY__) +#endif template EIGEN_DEVICE_FUNC @@ -1167,9 +1167,9 @@ T (floor)(const T& x) return floor(x); } -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(floor, floor) -#endif // defined(__SYCL_DEVICE_ONLY__) +#endif #if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE @@ -1187,9 +1187,9 @@ T (ceil)(const T& x) return ceil(x); } -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(ceil, ceil) -#endif // defined(__SYCL_DEVICE_ONLY__) +#endif #if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE @@ -1232,9 +1232,9 @@ T sqrt(const T &x) return sqrt(x); } -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(sqrt, sqrt) -#endif // defined(__SYCL_DEVICE_ONLY__) +#endif template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE @@ -1243,9 +1243,9 @@ T log(const T &x) { return log(x); } -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(log, log) -#endif // defined(__SYCL_DEVICE_ONLY__) +#endif #if defined(EIGEN_GPUCC) @@ -1271,10 +1271,10 @@ abs(const T &x) { return x; } -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) SYCL_SPECIALIZE_INTEGER_TYPES_UNARY(abs, abs) SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(abs, fabs) -#endif // defined(__SYCL_DEVICE_ONLY__) +#endif #if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE @@ -1301,9 +1301,9 @@ T exp(const T &x) { return exp(x); } -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(exp, exp) -#endif // defined(__SYCL_DEVICE_ONLY__) +#endif #if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE @@ -1336,9 +1336,9 @@ inline EIGEN_MATHFUNC_RETVAL(expm1, Scalar) expm1(const Scalar& x) return EIGEN_MATHFUNC_IMPL(expm1, Scalar)::run(x); } -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(expm1, expm1) -#endif // defined(__SYCL_DEVICE_ONLY__) +#endif #if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE @@ -1355,9 +1355,9 @@ T cos(const T &x) { return cos(x); } -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(cos,cos) -#endif // defined(__SYCL_DEVICE_ONLY__) +#endif #if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE @@ -1374,9 +1374,9 @@ T sin(const T &x) { return sin(x); } -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(sin, sin) -#endif // defined(__SYCL_DEVICE_ONLY__) +#endif #if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE @@ -1393,9 +1393,9 @@ T tan(const T &x) { return tan(x); } -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(tan, tan) -#endif // defined(__SYCL_DEVICE_ONLY__) +#endif #if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE @@ -1421,10 +1421,10 @@ T acosh(const T &x) { } #endif -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(acos, acos) SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(acosh, acosh) -#endif // defined(__SYCL_DEVICE_ONLY__) +#endif #if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE @@ -1450,10 +1450,10 @@ T asinh(const T &x) { } #endif -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(asin, asin) SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(asinh, asinh) -#endif // defined(__SYCL_DEVICE_ONLY__) +#endif #if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE @@ -1479,10 +1479,10 @@ T atanh(const T &x) { } #endif -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(atan, atan) SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(atanh, atanh) -#endif // defined(__SYCL_DEVICE_ONLY__) +#endif #if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE @@ -1500,9 +1500,9 @@ T cosh(const T &x) { return cosh(x); } -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(cosh, cosh) -#endif // defined(__SYCL_DEVICE_ONLY__) +#endif #if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE @@ -1519,9 +1519,9 @@ T sinh(const T &x) { return sinh(x); } -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(sinh, sinh) -#endif // defined(__SYCL_DEVICE_ONLY__) +#endif #if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE @@ -1538,14 +1538,14 @@ T tanh(const T &x) { return tanh(x); } -#if (!defined(EIGEN_GPUCC)) && EIGEN_FAST_MATH && (!defined(__SYCL_DEVICE_ONLY__)) +#if (!defined(EIGEN_GPUCC)) && EIGEN_FAST_MATH && !defined(SYCL_DEVICE_ONLY) EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tanh(float x) { return internal::generic_fast_tanh_float(x); } #endif -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(tanh, tanh) -#endif // defined(__SYCL_DEVICE_ONLY__) +#endif #if defined(EIGEN_GPUCC) template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE @@ -1562,9 +1562,9 @@ T fmod(const T& a, const T& b) { return fmod(a, b); } -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(fmod, fmod) -#endif // defined(__SYCL_DEVICE_ONLY__) +#endif #if defined(EIGEN_GPUCC) template <> @@ -1580,7 +1580,7 @@ double fmod(const double& a, const double& b) { } #endif -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) #undef SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_BINARY #undef SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_UNARY #undef SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_BINARY @@ -1595,7 +1595,7 @@ double fmod(const double& a, const double& b) { #undef SYCL_SPECIALIZE_GEN1_BINARY_FUNC #undef SYCL_SPECIALIZE_GEN2_BINARY_FUNC #undef SYCL_SPECIALIZE_BINARY_FUNC -#endif // defined(__SYCL_DEVICE_ONLY__) +#endif } // end namespace numext diff --git a/Eigen/src/Core/arch/GPU/Half.h b/Eigen/src/Core/arch/GPU/Half.h index 6869354b1..655dc20d5 100644 --- a/Eigen/src/Core/arch/GPU/Half.h +++ b/Eigen/src/Core/arch/GPU/Half.h @@ -65,7 +65,7 @@ struct __half_raw { typedef __half __half_raw; #endif // defined(EIGEN_HAS_CUDA_FP16) -#elif defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__) +#elif defined(SYCL_DEVICE_ONLY) typedef cl::sycl::half __half_raw; #endif diff --git a/Eigen/src/Core/arch/SYCL/InteropHeaders.h b/Eigen/src/Core/arch/SYCL/InteropHeaders.h index b09d45ea1..ef66fc7de 100644 --- a/Eigen/src/Core/arch/SYCL/InteropHeaders.h +++ b/Eigen/src/Core/arch/SYCL/InteropHeaders.h @@ -16,58 +16,67 @@ * \brief: * InteropHeaders * -*****************************************************************/ + *****************************************************************/ #ifndef EIGEN_INTEROP_HEADERS_SYCL_H #define EIGEN_INTEROP_HEADERS_SYCL_H -#if defined EIGEN_USE_SYCL + namespace Eigen { +#if !defined(EIGEN_DONT_VECTORIZE_SYCL) + namespace internal { -#define SYCL_PACKET_TRAITS(packet_type, val, unpacket_type, lengths)\ - template<> struct packet_traits : default_packet_traits\ - {\ - typedef packet_type type;\ - typedef packet_type half;\ - enum {\ - Vectorizable = 1,\ - AlignedOnScalar = 1,\ - size=lengths,\ - HasHalfPacket = 0,\ - HasDiv = 1,\ - HasLog = 1,\ - HasExp = 1,\ - HasSqrt = 1,\ - HasRsqrt = 1,\ - HasSin = 1,\ - HasCos = 1,\ - HasTan = 1,\ - HasASin = 1,\ - HasACos = 1,\ - HasATan = 1,\ - HasSinh = 1,\ - HasCosh = 1,\ - HasTanh = 1,\ - HasLGamma = 0,\ - HasDiGamma = 0,\ - HasZeta = 0,\ - HasPolygamma = 0,\ - HasErf = 0,\ - HasErfc = 0,\ - HasIGamma = 0,\ - HasIGammac = 0,\ - HasBetaInc = 0,\ - HasBlend = val,\ - HasMax=1,\ - HasMin=1,\ - HasMul=1,\ - HasAdd=1,\ - HasFloor=1,\ - HasRound=1,\ - HasLog1p=1,\ - HasExpm1=1,\ - HasCeil=1,\ - };\ + +template +struct sycl_packet_traits : default_packet_traits { + enum { + Vectorizable = 1, + AlignedOnScalar = 1, + size = lengths, + HasHalfPacket = 0, + HasDiv = 1, + HasLog = 1, + HasExp = 1, + HasSqrt = 1, + HasRsqrt = 1, + HasSin = 1, + HasCos = 1, + HasTan = 1, + HasASin = 1, + HasACos = 1, + HasATan = 1, + HasSinh = 1, + HasCosh = 1, + HasTanh = 1, + HasLGamma = 0, + HasDiGamma = 0, + HasZeta = 0, + HasPolygamma = 0, + HasErf = 0, + HasErfc = 0, + HasIGamma = 0, + HasIGammac = 0, + HasBetaInc = 0, + HasBlend = has_blend, + HasMax = 1, + HasMin = 1, + HasMul = 1, + HasAdd = 1, + HasFloor = 1, + HasRound = 1, + HasLog1p = 1, + HasExpm1 = 1, + HasCeil = 1, + }; +}; + +#ifdef SYCL_DEVICE_ONLY +#define SYCL_PACKET_TRAITS(packet_type, has_blend, unpacket_type, lengths) \ + template <> \ + struct packet_traits \ + : sycl_packet_traits { \ + typedef packet_type type; \ + typedef packet_type half; \ }; SYCL_PACKET_TRAITS(cl::sycl::cl_float4, 1, float, 4) @@ -76,29 +85,137 @@ SYCL_PACKET_TRAITS(cl::sycl::cl_double2, 0, double, 2) SYCL_PACKET_TRAITS(cl::sycl::cl_double2, 0, const double, 2) #undef SYCL_PACKET_TRAITS - // Make sure this is only available when targeting a GPU: we don't want to // introduce conflicts between these packet_traits definitions and the ones // we'll use on the host side (SSE, AVX, ...) -#define SYCL_ARITHMETIC(packet_type) template<> struct is_arithmetic { enum { value = true }; }; +#define SYCL_ARITHMETIC(packet_type) \ + template <> \ + struct is_arithmetic { \ + enum { value = true }; \ + }; SYCL_ARITHMETIC(cl::sycl::cl_float4) SYCL_ARITHMETIC(cl::sycl::cl_double2) #undef SYCL_ARITHMETIC -#define SYCL_UNPACKET_TRAITS(packet_type, unpacket_type, lengths)\ -template<> struct unpacket_traits {\ - typedef unpacket_type type;\ - enum {size=lengths, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false};\ - typedef packet_type half;\ -}; +#define SYCL_UNPACKET_TRAITS(packet_type, unpacket_type, lengths) \ + template <> \ + struct unpacket_traits { \ + typedef unpacket_type type; \ + enum { size = lengths, vectorizable = true, alignment = Aligned16 }; \ + typedef packet_type half; \ + }; SYCL_UNPACKET_TRAITS(cl::sycl::cl_float4, float, 4) SYCL_UNPACKET_TRAITS(cl::sycl::cl_double2, double, 2) #undef SYCL_UNPACKET_TRAITS +#endif + +} // end namespace internal + +#endif + +namespace TensorSycl { +namespace internal { + +template +struct PacketWrapper; +// This function should never get called on the device +#ifndef SYCL_DEVICE_ONLY +template +struct PacketWrapper { + typedef typename ::Eigen::internal::unpacket_traits::type + Scalar; + template + EIGEN_DEVICE_FUNC static Scalar scalarize(Index, PacketReturnType &) { + eigen_assert(false && "THERE IS NO PACKETIZE VERSION FOR THE CHOSEN TYPE"); + abort(); + } + EIGEN_DEVICE_FUNC static PacketReturnType convert_to_packet_type(Scalar in, + Scalar) { + return ::Eigen::internal::template plset(in); + } + EIGEN_DEVICE_FUNC static void set_packet(PacketReturnType, Scalar *) { + eigen_assert(false && "THERE IS NO PACKETIZE VERSION FOR THE CHOSEN TYPE"); + abort(); + } +}; + +#elif defined(SYCL_DEVICE_ONLY) +template +struct PacketWrapper { + typedef typename ::Eigen::internal::unpacket_traits::type + Scalar; + template + EIGEN_DEVICE_FUNC static Scalar scalarize(Index index, PacketReturnType &in) { + switch (index) { + case 0: + return in.x(); + case 1: + return in.y(); + case 2: + return in.z(); + case 3: + return in.w(); + default: + eigen_assert(false && "INDEX MUST BE BETWEEN 0 and 3"); + abort(); + } + } + EIGEN_DEVICE_FUNC static PacketReturnType convert_to_packet_type( + Scalar in, Scalar other) { + return PacketReturnType(in, other, other, other); + } + EIGEN_DEVICE_FUNC static void set_packet(PacketReturnType &lhs, Scalar *rhs) { + lhs = PacketReturnType(rhs[0], rhs[1], rhs[2], rhs[3]); + } +}; + +template +struct PacketWrapper { + typedef typename ::Eigen::internal::unpacket_traits::type + Scalar; + template + EIGEN_DEVICE_FUNC static Scalar scalarize(Index, PacketReturnType &in) { + return in; + } + EIGEN_DEVICE_FUNC static PacketReturnType convert_to_packet_type(Scalar in, + Scalar) { + return PacketReturnType(in); + } + EIGEN_DEVICE_FUNC static void set_packet(PacketReturnType &lhs, Scalar *rhs) { + lhs = rhs[0]; + } +}; + +template +struct PacketWrapper { + typedef typename ::Eigen::internal::unpacket_traits::type + Scalar; + template + EIGEN_DEVICE_FUNC static Scalar scalarize(Index index, PacketReturnType &in) { + switch (index) { + case 0: + return in.x(); + case 1: + return in.y(); + default: + eigen_assert(false && "INDEX MUST BE BETWEEN 0 and 1"); + abort(); + } + } + EIGEN_DEVICE_FUNC static PacketReturnType convert_to_packet_type( + Scalar in, Scalar other) { + return PacketReturnType(in, other); + } + EIGEN_DEVICE_FUNC static void set_packet(PacketReturnType &lhs, Scalar *rhs) { + lhs = PacketReturnType(rhs[0], rhs[1]); + } +}; -} // end namespace internal +#endif -} // end namespace Eigen +} // end namespace internal +} // end namespace TensorSycl +} // end namespace Eigen -#endif // EIGEN_USE_SYCL -#endif // EIGEN_INTEROP_HEADERS_SYCL_H +#endif // EIGEN_INTEROP_HEADERS_SYCL_H diff --git a/Eigen/src/Core/arch/SYCL/MathFunctions.h b/Eigen/src/Core/arch/SYCL/MathFunctions.h index 422839c6c..9e16e6c3f 100644 --- a/Eigen/src/Core/arch/SYCL/MathFunctions.h +++ b/Eigen/src/Core/arch/SYCL/MathFunctions.h @@ -16,7 +16,7 @@ * \brief: * MathFunctions * -*****************************************************************/ + *****************************************************************/ #ifndef EIGEN_MATH_FUNCTIONS_SYCL_H #define EIGEN_MATH_FUNCTIONS_SYCL_H @@ -28,194 +28,251 @@ namespace internal { // Make sure this is only available when targeting a GPU: we don't want to // introduce conflicts between these packet_traits definitions and the ones // we'll use on the host side (SSE, AVX, ...) -//#if defined(__SYCL_DEVICE_ONLY__) && defined(EIGEN_USE_SYCL) -#define SYCL_PLOG(packet_type) \ -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ -packet_type plog(const packet_type& a) { return cl::sycl::log(a); } +#if defined(SYCL_DEVICE_ONLY) +#define SYCL_PLOG(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type plog( \ + const packet_type& a) { \ + return cl::sycl::log(a); \ + } SYCL_PLOG(cl::sycl::cl_float4) SYCL_PLOG(cl::sycl::cl_double2) #undef SYCL_PLOG -#define SYCL_PLOG1P(packet_type) \ -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ -packet_type plog1p(const packet_type& a) { return cl::sycl::log1p(a); } +#define SYCL_PLOG1P(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type plog1p( \ + const packet_type& a) { \ + return cl::sycl::log1p(a); \ + } SYCL_PLOG1P(cl::sycl::cl_float4) SYCL_PLOG1P(cl::sycl::cl_double2) #undef SYCL_PLOG1P -#define SYCL_PLOG10(packet_type) \ -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ -packet_type plog10(const packet_type& a) { return cl::sycl::log10(a); } +#define SYCL_PLOG10(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type plog10( \ + const packet_type& a) { \ + return cl::sycl::log10(a); \ + } SYCL_PLOG10(cl::sycl::cl_float4) SYCL_PLOG10(cl::sycl::cl_double2) #undef SYCL_PLOG10 -#define SYCL_PEXP(packet_type) \ -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ -packet_type pexp(const packet_type& a) { return cl::sycl::exp(a); } +#define SYCL_PEXP(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pexp( \ + const packet_type& a) { \ + return cl::sycl::exp(a); \ + } SYCL_PEXP(cl::sycl::cl_float4) SYCL_PEXP(cl::sycl::cl_double2) #undef SYCL_PEXP -#define SYCL_PEXPM1(packet_type) \ -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ -packet_type pexpm1(const packet_type& a) { return cl::sycl::expm1(a); } +#define SYCL_PEXPM1(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pexpm1( \ + const packet_type& a) { \ + return cl::sycl::expm1(a); \ + } SYCL_PEXPM1(cl::sycl::cl_float4) SYCL_PEXPM1(cl::sycl::cl_double2) #undef SYCL_PEXPM1 -#define SYCL_PSQRT(packet_type) \ -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ -packet_type psqrt(const packet_type& a) { return cl::sycl::sqrt(a); } +#define SYCL_PSQRT(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type psqrt( \ + const packet_type& a) { \ + return cl::sycl::sqrt(a); \ + } SYCL_PSQRT(cl::sycl::cl_float4) SYCL_PSQRT(cl::sycl::cl_double2) #undef SYCL_PSQRT - -#define SYCL_PRSQRT(packet_type) \ -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ -packet_type prsqrt(const packet_type& a) { return cl::sycl::rsqrt(a); } +#define SYCL_PRSQRT(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type prsqrt( \ + const packet_type& a) { \ + return cl::sycl::rsqrt(a); \ + } SYCL_PRSQRT(cl::sycl::cl_float4) SYCL_PRSQRT(cl::sycl::cl_double2) #undef SYCL_PRSQRT - /** \internal \returns the hyperbolic sine of \a a (coeff-wise) */ -#define SYCL_PSIN(packet_type) \ -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ -packet_type psin(const packet_type& a) { return cl::sycl::sin(a); } +#define SYCL_PSIN(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type psin( \ + const packet_type& a) { \ + return cl::sycl::sin(a); \ + } SYCL_PSIN(cl::sycl::cl_float4) SYCL_PSIN(cl::sycl::cl_double2) #undef SYCL_PSIN - /** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */ -#define SYCL_PCOS(packet_type) \ -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ -packet_type pcos(const packet_type& a) { return cl::sycl::cos(a); } +#define SYCL_PCOS(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pcos( \ + const packet_type& a) { \ + return cl::sycl::cos(a); \ + } SYCL_PCOS(cl::sycl::cl_float4) SYCL_PCOS(cl::sycl::cl_double2) #undef SYCL_PCOS /** \internal \returns the hyperbolic tan of \a a (coeff-wise) */ -#define SYCL_PTAN(packet_type) \ -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ -packet_type ptan(const packet_type& a) { return cl::sycl::tan(a); } +#define SYCL_PTAN(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ptan( \ + const packet_type& a) { \ + return cl::sycl::tan(a); \ + } SYCL_PTAN(cl::sycl::cl_float4) SYCL_PTAN(cl::sycl::cl_double2) #undef SYCL_PTAN /** \internal \returns the hyperbolic sine of \a a (coeff-wise) */ -#define SYCL_PASIN(packet_type) \ -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ -packet_type pasin(const packet_type& a) { return cl::sycl::asin(a); } +#define SYCL_PASIN(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pasin( \ + const packet_type& a) { \ + return cl::sycl::asin(a); \ + } SYCL_PASIN(cl::sycl::cl_float4) SYCL_PASIN(cl::sycl::cl_double2) #undef SYCL_PASIN - /** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */ -#define SYCL_PACOS(packet_type) \ -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ -packet_type pacos(const packet_type& a) { return cl::sycl::acos(a); } +#define SYCL_PACOS(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pacos( \ + const packet_type& a) { \ + return cl::sycl::acos(a); \ + } SYCL_PACOS(cl::sycl::cl_float4) SYCL_PACOS(cl::sycl::cl_double2) #undef SYCL_PACOS /** \internal \returns the hyperbolic tan of \a a (coeff-wise) */ -#define SYCL_PATAN(packet_type) \ -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ -packet_type patan(const packet_type& a) { return cl::sycl::atan(a); } +#define SYCL_PATAN(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type patan( \ + const packet_type& a) { \ + return cl::sycl::atan(a); \ + } SYCL_PATAN(cl::sycl::cl_float4) SYCL_PATAN(cl::sycl::cl_double2) #undef SYCL_PATAN /** \internal \returns the hyperbolic sine of \a a (coeff-wise) */ -#define SYCL_PSINH(packet_type) \ -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ -packet_type psinh(const packet_type& a) { return cl::sycl::sinh(a); } +#define SYCL_PSINH(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type psinh( \ + const packet_type& a) { \ + return cl::sycl::sinh(a); \ + } SYCL_PSINH(cl::sycl::cl_float4) SYCL_PSINH(cl::sycl::cl_double2) #undef SYCL_PSINH /** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */ -#define SYCL_PCOSH(packet_type) \ -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ -packet_type pcosh(const packet_type& a) { return cl::sycl::cosh(a); } +#define SYCL_PCOSH(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pcosh( \ + const packet_type& a) { \ + return cl::sycl::cosh(a); \ + } SYCL_PCOSH(cl::sycl::cl_float4) SYCL_PCOSH(cl::sycl::cl_double2) #undef SYCL_PCOSH /** \internal \returns the hyperbolic tan of \a a (coeff-wise) */ -#define SYCL_PTANH(packet_type) \ -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ -packet_type ptanh(const packet_type& a) { return cl::sycl::tanh(a); } +#define SYCL_PTANH(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ptanh( \ + const packet_type& a) { \ + return cl::sycl::tanh(a); \ + } SYCL_PTANH(cl::sycl::cl_float4) SYCL_PTANH(cl::sycl::cl_double2) #undef SYCL_PTANH -#define SYCL_PCEIL(packet_type) \ -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ -packet_type pceil(const packet_type& a) { return cl::sycl::ceil(a); } +#define SYCL_PCEIL(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pceil( \ + const packet_type& a) { \ + return cl::sycl::ceil(a); \ + } SYCL_PCEIL(cl::sycl::cl_float4) SYCL_PCEIL(cl::sycl::cl_double2) #undef SYCL_PCEIL - -#define SYCL_PROUND(packet_type) \ -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ -packet_type pround(const packet_type& a) { return cl::sycl::round(a); } +#define SYCL_PROUND(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pround( \ + const packet_type& a) { \ + return cl::sycl::round(a); \ + } SYCL_PROUND(cl::sycl::cl_float4) SYCL_PROUND(cl::sycl::cl_double2) #undef SYCL_PROUND -#define SYCL_FLOOR(packet_type) \ -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ -packet_type pfloor(const packet_type& a) { return cl::sycl::floor(a); } +#define SYCL_FLOOR(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pfloor( \ + const packet_type& a) { \ + return cl::sycl::floor(a); \ + } SYCL_FLOOR(cl::sycl::cl_float4) SYCL_FLOOR(cl::sycl::cl_double2) #undef SYCL_FLOOR - -#define SYCL_PMIN(packet_type, expr) \ -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ -packet_type pmin(const packet_type& a, const packet_type& b) { return expr; } +#define SYCL_PMIN(packet_type, expr) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pmin( \ + const packet_type& a, const packet_type& b) { \ + return expr; \ + } SYCL_PMIN(cl::sycl::cl_float4, cl::sycl::fmin(a, b)) SYCL_PMIN(cl::sycl::cl_double2, cl::sycl::fmin(a, b)) #undef SYCL_PMIN -#define SYCL_PMAX(packet_type, expr) \ -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \ -packet_type pmax(const packet_type& a, const packet_type& b) { return expr; } +#define SYCL_PMAX(packet_type, expr) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pmax( \ + const packet_type& a, const packet_type& b) { \ + return expr; \ + } SYCL_PMAX(cl::sycl::cl_float4, cl::sycl::fmax(a, b)) SYCL_PMAX(cl::sycl::cl_double2, cl::sycl::fmax(a, b)) #undef SYCL_PMAX -//#endif +#endif -} // end namespace internal +} // end namespace internal -} // end namespace Eigen +} // end namespace Eigen -#endif // EIGEN_MATH_FUNCTIONS_CUDA_H +#endif // EIGEN_MATH_FUNCTIONS_SYCL_H diff --git a/Eigen/src/Core/arch/SYCL/PacketMath.h b/Eigen/src/Core/arch/SYCL/PacketMath.h index 820a83311..a9adb64ba 100644 --- a/Eigen/src/Core/arch/SYCL/PacketMath.h +++ b/Eigen/src/Core/arch/SYCL/PacketMath.h @@ -16,85 +16,122 @@ * \brief: * PacketMath * -*****************************************************************/ + *****************************************************************/ #ifndef EIGEN_PACKET_MATH_SYCL_H #define EIGEN_PACKET_MATH_SYCL_H #include -#if defined EIGEN_USE_SYCL namespace Eigen { namespace internal { - -#define SYCL_PLOADT_RO(address_space_target)\ -template\ - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type\ - ploadt_ro(typename cl::sycl::multi_ptr::type,\ - cl::sycl::access::address_space::address_space_target>::pointer_t from) {\ - typedef typename unpacket_traits::type scalar;\ - typedef cl::sycl::multi_ptr multi_ptr;\ - auto res=packet_type(static_cast::type>(0));\ - res.load(0, multi_ptr(const_cast(from)));\ - return res;\ -} +#ifdef SYCL_DEVICE_ONLY + +#define SYCL_PLOADT_RO(address_space_target) \ + template \ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt_ro( \ + typename cl::sycl::multi_ptr< \ + const typename unpacket_traits::type, \ + cl::sycl::access::address_space::address_space_target>::pointer_t \ + from) { \ + typedef typename unpacket_traits::type scalar; \ + typedef cl::sycl::multi_ptr< \ + scalar, cl::sycl::access::address_space::address_space_target> \ + multi_ptr; \ + auto res = packet_type( \ + static_cast::type>(0)); \ + res.load(0, multi_ptr(const_cast(from))); \ + return res; \ + } SYCL_PLOADT_RO(global_space) SYCL_PLOADT_RO(local_space) - #undef SYCL_PLOADT_RO +#endif +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type +ploadt_ro(const Eigen::TensorSycl::internal::RangeAccess< + cl::sycl::access::mode::read_write, T>& from) { + return ploadt_ro(from.get_pointer()); +} -#define SYCL_PLOAD(address_space_target, Alignment, AlignedType)\ -template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type\ - pload##AlignedType(typename cl::sycl::multi_ptr::type,\ - cl::sycl::access::address_space::address_space_target>::pointer_t from) {\ - return ploadt_ro(from);\ - } +#ifdef SYCL_DEVICE_ONLY +#define SYCL_PLOAD(address_space_target, Alignment, AlignedType) \ + template \ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType( \ + typename cl::sycl::multi_ptr< \ + const typename unpacket_traits::type, \ + cl::sycl::access::address_space::address_space_target>::pointer_t \ + from) { \ + return ploadt_ro(from); \ + } // global space SYCL_PLOAD(global_space, Unaligned, u) SYCL_PLOAD(global_space, Aligned, ) - // local space SYCL_PLOAD(local_space, Unaligned, u) SYCL_PLOAD(local_space, Aligned, ) -// private space -//SYCL_PLOAD(private_space, Unaligned, u) -//SYCL_PLOAD(private_space, Aligned, ) - +#undef SYCL_PLOAD +#endif + +#define SYCL_PLOAD(Alignment, AlignedType) \ + template \ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType( \ + const Eigen::TensorSycl::internal::RangeAccess< \ + cl::sycl::access::mode::read_write, \ + typename unpacket_traits::type> \ + from) { \ + return ploadt_ro(from); \ + } +SYCL_PLOAD(Unaligned, u) +SYCL_PLOAD(Aligned, ) #undef SYCL_PLOAD - +#ifdef SYCL_DEVICE_ONLY /** \internal \returns a packet version of \a *from. - * The pointer \a from must be aligned on a \a Alignment bytes boundary. */ -#define SYCL_PLOADT(address_space_target)\ -template\ -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt(\ - typename cl::sycl::multi_ptr::type,\ - cl::sycl::access::address_space::address_space_target>::pointer_t from)\ -{\ - if(Alignment >= unpacket_traits::alignment)\ - return pload(from);\ - else\ - return ploadu(from);\ -} + * The pointer \a from must be aligned on a \a Alignment bytes boundary. */ +#define SYCL_PLOADT(address_space_target) \ + template \ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt( \ + typename cl::sycl::multi_ptr< \ + const typename unpacket_traits::type, \ + cl::sycl::access::address_space::address_space_target>::pointer_t \ + from) { \ + if (Alignment >= unpacket_traits::alignment) \ + return pload(from); \ + else \ + return ploadu(from); \ + } // global space SYCL_PLOADT(global_space) // local space SYCL_PLOADT(local_space) +#undef SYCL_PLOADT +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type +ploadt(const Eigen::TensorSycl::internal::RangeAccess< + cl::sycl::access::mode::read_write, + typename unpacket_traits::type>& from) { + return ploadt(from.get_pointer()); +} +#ifdef SYCL_DEVICE_ONLY -//private_space -// There is no need to specialise it for private space as it can use the GenericPacketMath version - -#define SYCL_PLOADT_RO_SPECIAL(packet_type, Alignment)\ - template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type\ - ploadt_ro(const typename unpacket_traits::type * from) { \ - typedef typename unpacket_traits::type scalar;\ - auto res=packet_type(static_cast(0));\ - res. template load(0, const_cast(from));\ - return res;\ +// private_space +#define SYCL_PLOADT_RO_SPECIAL(packet_type, Alignment) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type \ + ploadt_ro( \ + const typename unpacket_traits::type* from) { \ + typedef typename unpacket_traits::type scalar; \ + auto res = packet_type(static_cast(0)); \ + res.template load( \ + 0, const_cast(from)); \ + return res; \ } SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Aligned) @@ -102,37 +139,42 @@ SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Aligned) SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Unaligned) SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Unaligned) - -#define SYCL_PLOAD_SPECIAL(packet_type, alignment_type)\ -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type\ - pload##alignment_type(const typename unpacket_traits::type * from) { \ - typedef typename unpacket_traits::type scalar;\ - auto res=packet_type(static_cast(0));\ - res. template load(0, const_cast(from));\ - return res;\ +#define SYCL_PLOAD_SPECIAL(packet_type, alignment_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##alignment_type( \ + const typename unpacket_traits::type* from) { \ + typedef typename unpacket_traits::type scalar; \ + auto res = packet_type(static_cast(0)); \ + res.template load( \ + 0, const_cast(from)); \ + return res; \ } -SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4,) -SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2,) +SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, ) +SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, ) SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, u) SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, u) #undef SYCL_PLOAD_SPECIAL -#define SYCL_PSTORE(scalar, packet_type, address_space_target, alignment)\ -template<>\ - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \ - typename cl::sycl::multi_ptr::pointer_t to, \ - const packet_type& from) {\ - typedef cl::sycl::multi_ptr multi_ptr;\ - from.store(0, multi_ptr(to));\ -} +#define SYCL_PSTORE(scalar, packet_type, address_space_target, alignment) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \ + typename cl::sycl::multi_ptr< \ + scalar, \ + cl::sycl::access::address_space::address_space_target>::pointer_t \ + to, \ + const packet_type& from) { \ + typedef cl::sycl::multi_ptr< \ + scalar, cl::sycl::access::address_space::address_space_target> \ + multi_ptr; \ + from.store(0, multi_ptr(to)); \ + } // global space SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, ) SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, u) SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, ) SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, u) - SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, ) SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, u) SYCL_PSTORE(double, cl::sycl::cl_double2, local_space, ) @@ -142,36 +184,34 @@ SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, ) SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, u) SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, ) SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, u) +#undef SYCL_PSTORE + +#define SYCL_PSTORE_T(address_space_target) \ + template \ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret( \ + typename cl::sycl::multi_ptr< \ + scalar, \ + cl::sycl::access::address_space::address_space_target>::pointer_t \ + to, \ + const packet_type& from) { \ + if (Alignment) \ + pstore(to, from); \ + else \ + pstoreu(to, from); \ + } +SYCL_PSTORE_T(global_space) -#define SYCL_PSTORE_T(scalar, packet_type, Alignment)\ -template<>\ -EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(\ - scalar* to,\ - const packet_type& from) {\ - if(Alignment)\ - pstore(to, from);\ - else\ - pstoreu(to,from);\ -} - - -SYCL_PSTORE_T(float, cl::sycl::cl_float4, Aligned) - -SYCL_PSTORE_T(float, cl::sycl::cl_float4, Unaligned) - -SYCL_PSTORE_T(double, cl::sycl::cl_double2, Aligned) - -SYCL_PSTORE_T(double, cl::sycl::cl_double2, Unaligned) - +SYCL_PSTORE_T(local_space) #undef SYCL_PSTORE_T -#define SYCL_PSET1(packet_type)\ -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pset1(\ - const typename unpacket_traits::type& from) {\ - return packet_type(from);\ -} +#define SYCL_PSET1(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pset1( \ + const typename unpacket_traits::type& from) { \ + return packet_type(from); \ + } // global space SYCL_PSET1(cl::sycl::cl_float4) @@ -179,280 +219,343 @@ SYCL_PSET1(cl::sycl::cl_double2) #undef SYCL_PSET1 - -template struct get_base_packet { -template - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type get_ploaddup(sycl_multi_pointer ) {} +template +struct get_base_packet { + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type + get_ploaddup(sycl_multi_pointer) {} template - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type get_pgather(sycl_multi_pointer , Index ) {} + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type + get_pgather(sycl_multi_pointer, Index) {} }; -template <> struct get_base_packet { +template <> +struct get_base_packet { template - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_ploaddup(sycl_multi_pointer from) { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_ploaddup( + sycl_multi_pointer from) { return cl::sycl::cl_float4(from[0], from[0], from[1], from[1]); } template - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_pgather(sycl_multi_pointer from, Index stride) { - return cl::sycl::cl_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]); + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_pgather( + sycl_multi_pointer from, Index stride) { + return cl::sycl::cl_float4(from[0 * stride], from[1 * stride], + from[2 * stride], from[3 * stride]); } template - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(sycl_multi_pointer to , const cl::sycl::cl_float4& from, Index stride) { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter( + sycl_multi_pointer to, const cl::sycl::cl_float4& from, Index stride) { auto tmp = stride; to[0] = from.x(); to[tmp] = from.y(); to[tmp += stride] = from.z(); to[tmp += stride] = from.w(); - } - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 set_plset(const float& a) { - return cl::sycl::cl_float4(static_cast(a), static_cast(a+1), static_cast(a+2), static_cast(a+3)); - } + } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 set_plset( + const float& a) { + return cl::sycl::cl_float4(static_cast(a), static_cast(a + 1), + static_cast(a + 2), + static_cast(a + 3)); + } }; -template <> struct get_base_packet { +template <> +struct get_base_packet { template - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_ploaddup(const sycl_multi_pointer from) { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 + get_ploaddup(const sycl_multi_pointer from) { return cl::sycl::cl_double2(from[0], from[0]); } template - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_pgather(const sycl_multi_pointer from, Index stride) { - return cl::sycl::cl_double2(from[0*stride], from[1*stride]); + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_pgather( + const sycl_multi_pointer from, Index stride) { + return cl::sycl::cl_double2(from[0 * stride], from[1 * stride]); } template - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(sycl_multi_pointer to , const cl::sycl::cl_double2& from, Index stride) { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter( + sycl_multi_pointer to, const cl::sycl::cl_double2& from, Index stride) { to[0] = from.x(); to[stride] = from.y(); } - static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 set_plset(const double& a) { - return cl::sycl::cl_double2(static_cast(a), static_cast(a + 1)); + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 set_plset( + const double& a) { + return cl::sycl::cl_double2(static_cast(a), + static_cast(a + 1)); } }; -#define SYCL_PLOAD_DUP(address_space_target)\ -template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \ -ploaddup(typename cl::sycl::multi_ptr::type,\ - cl::sycl::access::address_space::address_space_target>::pointer_t from)\ -{\ - return get_base_packet::get_ploaddup(from); \ -} +#define SYCL_PLOAD_DUP(address_space_target) \ + template \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup( \ + typename cl::sycl::multi_ptr< \ + const typename unpacket_traits::type, \ + cl::sycl::access::address_space::address_space_target>::pointer_t \ + from) { \ + return get_base_packet::get_ploaddup(from); \ + } // global space SYCL_PLOAD_DUP(global_space) // local_space SYCL_PLOAD_DUP(local_space) -// private_space -//SYCL_PLOAD_DUP(private_space) #undef SYCL_PLOAD_DUP -#define SYCL_PLOAD_DUP_SPECILIZE(packet_type)\ -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \ -ploaddup(const typename unpacket_traits::type * from)\ -{ \ - return get_base_packet::get_ploaddup(from); \ -} +#define SYCL_PLOAD_DUP_SPECILIZE(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup( \ + const typename unpacket_traits::type* from) { \ + return get_base_packet::get_ploaddup(from); \ + } SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_float4) SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_double2) #undef SYCL_PLOAD_DUP_SPECILIZE -#define SYCL_PLSET(packet_type)\ -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type plset(const typename unpacket_traits::type& a) {\ - return get_base_packet::set_plset(a);\ -} +#define SYCL_PLSET(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type plset( \ + const typename unpacket_traits::type& a) { \ + return get_base_packet::set_plset(a); \ + } SYCL_PLSET(cl::sycl::cl_float4) SYCL_PLSET(cl::sycl::cl_double2) #undef SYCL_PLSET - -#define SYCL_PGATHER(address_space_target)\ -template EIGEN_DEVICE_FUNC inline packet_type pgather(\ - typename cl::sycl::multi_ptr::type,\ - cl::sycl::access::address_space::address_space_target>::pointer_t from, Index stride) {\ - return get_base_packet::get_pgather(from, stride); \ -} +#define SYCL_PGATHER(address_space_target) \ + template \ + EIGEN_DEVICE_FUNC inline packet_type pgather( \ + typename cl::sycl::multi_ptr< \ + const typename unpacket_traits::type, \ + cl::sycl::access::address_space::address_space_target>::pointer_t \ + from, \ + Index stride) { \ + return get_base_packet::get_pgather(from, stride); \ + } // global space SYCL_PGATHER(global_space) // local space SYCL_PGATHER(local_space) -// private space -//SYCL_PGATHER(private_space) #undef SYCL_PGATHER - -#define SYCL_PGATHER_SPECILIZE(scalar, packet_type)\ -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \ -pgather(const typename unpacket_traits::type * from, Index stride)\ -{ \ - return get_base_packet::get_pgather(from, stride); \ -} +#define SYCL_PGATHER_SPECILIZE(scalar, packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \ + pgather( \ + const typename unpacket_traits::type* from, Index stride) { \ + return get_base_packet::get_pgather(from, stride); \ + } SYCL_PGATHER_SPECILIZE(float, cl::sycl::cl_float4) SYCL_PGATHER_SPECILIZE(double, cl::sycl::cl_double2) #undef SYCL_PGATHER_SPECILIZE -#define SYCL_PSCATTER(address_space_target)\ -template EIGEN_DEVICE_FUNC inline void pscatter(\ - typename cl::sycl::multi_ptr::type,\ - cl::sycl::access::address_space::address_space_target>::pointer_t to,\ - const packet_type& from, Index stride) {\ - get_base_packet::set_pscatter(to, from, stride);\ -} +#define SYCL_PSCATTER(address_space_target) \ + template \ + EIGEN_DEVICE_FUNC inline void pscatter( \ + typename cl::sycl::multi_ptr< \ + typename unpacket_traits::type, \ + cl::sycl::access::address_space::address_space_target>::pointer_t \ + to, \ + const packet_type& from, Index stride) { \ + get_base_packet::set_pscatter(to, from, stride); \ + } // global space SYCL_PSCATTER(global_space) // local space SYCL_PSCATTER(local_space) -// private space -//SYCL_PSCATTER(private_space) #undef SYCL_PSCATTER - - -#define SYCL_PSCATTER_SPECILIZE(scalar, packet_type)\ -template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void \ -pscatter(typename unpacket_traits::type * to, const packet_type& from, Index stride)\ -{ \ - get_base_packet::set_pscatter(to, from, stride);\ -} +#define SYCL_PSCATTER_SPECILIZE(scalar, packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter( \ + typename unpacket_traits::type * to, \ + const packet_type& from, Index stride) { \ + get_base_packet::set_pscatter(to, from, stride); \ + } SYCL_PSCATTER_SPECILIZE(float, cl::sycl::cl_float4) SYCL_PSCATTER_SPECILIZE(double, cl::sycl::cl_double2) #undef SYCL_PSCATTER_SPECILIZE - -#define SYCL_PMAD(packet_type)\ -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pmadd( const packet_type& a,\ - const packet_type& b, const packet_type& c){\ - return cl::sycl::mad(a,b,c);\ -} +#define SYCL_PMAD(packet_type) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pmadd( \ + const packet_type& a, const packet_type& b, const packet_type& c) { \ + return cl::sycl::mad(a, b, c); \ + } SYCL_PMAD(cl::sycl::cl_float4) SYCL_PMAD(cl::sycl::cl_double2) #undef SYCL_PMAD - - -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float pfirst(const cl::sycl::cl_float4& a) { +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float pfirst( + const cl::sycl::cl_float4& a) { return a.x(); } -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double pfirst(const cl::sycl::cl_double2& a) { +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double pfirst( + const cl::sycl::cl_double2& a) { return a.x(); } -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux(const cl::sycl::cl_float4& a) { +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux( + const cl::sycl::cl_float4& a) { return a.x() + a.y() + a.z() + a.w(); } -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux(const cl::sycl::cl_double2& a) { +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux( + const cl::sycl::cl_double2& a) { return a.x() + a.y(); } -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_max(const cl::sycl::cl_float4& a) { - return cl::sycl::fmax(cl::sycl::fmax(a.x(), a.y()), cl::sycl::fmax(a.z(), a.w())); +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_max( + const cl::sycl::cl_float4& a) { + return cl::sycl::fmax(cl::sycl::fmax(a.x(), a.y()), + cl::sycl::fmax(a.z(), a.w())); } -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_max(const cl::sycl::cl_double2& a) { +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_max( + const cl::sycl::cl_double2& a) { return cl::sycl::fmax(a.x(), a.y()); } -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_min(const cl::sycl::cl_float4& a) { - return cl::sycl::fmin(cl::sycl::fmin(a.x(), a.y()), cl::sycl::fmin(a.z(), a.w())); +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_min( + const cl::sycl::cl_float4& a) { + return cl::sycl::fmin(cl::sycl::fmin(a.x(), a.y()), + cl::sycl::fmin(a.z(), a.w())); } -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_min(const cl::sycl::cl_double2& a) { +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_min( + const cl::sycl::cl_double2& a) { return cl::sycl::fmin(a.x(), a.y()); } -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_mul(const cl::sycl::cl_float4& a) { +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_mul( + const cl::sycl::cl_float4& a) { return a.x() * a.y() * a.z() * a.w(); } -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_mul(const cl::sycl::cl_double2& a) { +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_mul( + const cl::sycl::cl_double2& a) { return a.x() * a.y(); } -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pabs(const cl::sycl::cl_float4& a) { - return cl::sycl::cl_float4(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()), cl::sycl::fabs(a.z()), cl::sycl::fabs(a.w())); +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 +pabs(const cl::sycl::cl_float4& a) { + return cl::sycl::cl_float4(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()), + cl::sycl::fabs(a.z()), cl::sycl::fabs(a.w())); } -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2 pabs(const cl::sycl::cl_double2& a) { +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2 +pabs(const cl::sycl::cl_double2& a) { return cl::sycl::cl_double2(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y())); } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void -ptranspose(PacketBlock& kernel) { +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose( + PacketBlock& kernel) { float tmp = kernel.packet[0].y(); kernel.packet[0].y() = kernel.packet[1].x(); kernel.packet[1].x() = tmp; -// std::swap(kernel.packet[0].y(), kernel.packet[1].x()); tmp = kernel.packet[0].z(); kernel.packet[0].z() = kernel.packet[2].x(); kernel.packet[2].x() = tmp; - //std::swap(kernel.packet[0].z(), kernel.packet[2].x()); tmp = kernel.packet[0].w(); kernel.packet[0].w() = kernel.packet[3].x(); kernel.packet[3].x() = tmp; - //std::swap(kernel.packet[0].w(), kernel.packet[3].x()); - tmp = kernel.packet[1].z(); kernel.packet[1].z() = kernel.packet[2].y(); kernel.packet[2].y() = tmp; -// std::swap(kernel.packet[1].z(), kernel.packet[2].y()); tmp = kernel.packet[1].w(); kernel.packet[1].w() = kernel.packet[3].y(); kernel.packet[3].y() = tmp; -// std::swap(kernel.packet[1].w(), kernel.packet[3].y()); tmp = kernel.packet[2].w(); kernel.packet[2].w() = kernel.packet[3].z(); kernel.packet[3].z() = tmp; -// std::swap(kernel.packet[2].w(), kernel.packet[3].z()); - } - EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void -ptranspose(PacketBlock& kernel) { +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose( + PacketBlock& kernel) { double tmp = kernel.packet[0].y(); kernel.packet[0].y() = kernel.packet[1].x(); kernel.packet[1].x() = tmp; -//std::swap(kernel.packet[0].y(), kernel.packet[1].x()); } - -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 -pblend(const Selector::size>& ifPacket, - const cl::sycl::cl_float4& thenPacket, const cl::sycl::cl_float4& elsePacket) { - cl::sycl::cl_int4 condition(ifPacket.select[0] ? 0 : -1, - ifPacket.select[1] ? 0 : -1, - ifPacket.select[2] ? 0 : -1, - ifPacket.select[3] ? 0 : -1); +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pblend( + const Selector::size>& ifPacket, + const cl::sycl::cl_float4& thenPacket, + const cl::sycl::cl_float4& elsePacket) { + cl::sycl::cl_int4 condition( + ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1, + ifPacket.select[2] ? 0 : -1, ifPacket.select[3] ? 0 : -1); return cl::sycl::select(thenPacket, elsePacket, condition); } -template<> inline cl::sycl::cl_double2 -pblend(const Selector::size>& ifPacket, - const cl::sycl::cl_double2& thenPacket, const cl::sycl::cl_double2& elsePacket) { +template <> +inline cl::sycl::cl_double2 pblend( + const Selector::size>& ifPacket, + const cl::sycl::cl_double2& thenPacket, + const cl::sycl::cl_double2& elsePacket) { cl::sycl::cl_long2 condition(ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1); return cl::sycl::select(thenPacket, elsePacket, condition); } +#endif // SYCL_DEVICE_ONLY + +#define SYCL_PSTORE(alignment) \ + template \ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \ + const Eigen::TensorSycl::internal::RangeAccess< \ + cl::sycl::access::mode::read_write, \ + typename unpacket_traits::type>& to, \ + const packet_type& from) { \ + pstore##alignment(to.get_pointer(), from); \ + } + +// global space +SYCL_PSTORE() +SYCL_PSTORE(u) + +#undef SYCL_PSTORE + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret( + Eigen::TensorSycl::internal::RangeAccess< + cl::sycl::access::mode::read_write, + typename unpacket_traits::type> + to, + const packet_type& from) { + pstoret(to.get_pointer(), from); +} -} // end namespace internal +} // end namespace internal -} // end namespace Eigen +} // end namespace Eigen -#endif // EIGEN_USE_SYCL -#endif // EIGEN_PACKET_MATH_SYCL_H +#endif // EIGEN_PACKET_MATH_SYCL_H diff --git a/Eigen/src/Core/arch/SYCL/TypeCasting.h b/Eigen/src/Core/arch/SYCL/TypeCasting.h index dedd5c84a..9208ab21d 100644 --- a/Eigen/src/Core/arch/SYCL/TypeCasting.h +++ b/Eigen/src/Core/arch/SYCL/TypeCasting.h @@ -16,7 +16,7 @@ * \brief: * TypeCasting * -*****************************************************************/ + *****************************************************************/ #ifndef EIGEN_TYPE_CASTING_SYCL_H #define EIGEN_TYPE_CASTING_SYCL_H @@ -24,66 +24,62 @@ namespace Eigen { namespace internal { -#ifdef __SYCL_DEVICE_ONLY__ +#ifdef SYCL_DEVICE_ONLY template <> struct type_casting_traits { - enum { - VectorizedCast = 1, - SrcCoeffRatio = 1, - TgtCoeffRatio = 1 - }; + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; }; -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_int4 pcast(const cl::sycl::cl_float4& a) { - return a. template convert(); +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_int4 +pcast(const cl::sycl::cl_float4& a) { + return a + .template convert(); } - template <> struct type_casting_traits { - enum { - VectorizedCast = 1, - SrcCoeffRatio = 1, - TgtCoeffRatio = 1 - }; + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 }; }; -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pcast(const cl::sycl::cl_int4& a) { - return a. template convert(); +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 +pcast(const cl::sycl::cl_int4& a) { + return a.template convert(); } template <> struct type_casting_traits { - enum { - VectorizedCast = 1, - SrcCoeffRatio = 2, - TgtCoeffRatio = 1 - }; + enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 }; }; -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pcast(const cl::sycl::cl_double2& a, const cl::sycl::cl_double2& b) { - auto a1=a. template convert(); - auto b1=b. template convert(); +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 +pcast( + const cl::sycl::cl_double2& a, const cl::sycl::cl_double2& b) { + auto a1 = a.template convert(); + auto b1 = b.template convert(); return cl::sycl::float4(a1.x(), a1.y(), b1.x(), b1.y()); } template <> struct type_casting_traits { - enum { - VectorizedCast = 1, - SrcCoeffRatio = 1, - TgtCoeffRatio = 2 - }; + enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 }; }; -template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2 pcast(const cl::sycl::cl_float4& a) { +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2 +pcast(const cl::sycl::cl_float4& a) { // Simply discard the second half of the input return cl::sycl::cl_double2(a.x(), a.y()); } #endif -} // end namespace internal +} // end namespace internal -} // end namespace Eigen +} // end namespace Eigen -#endif // EIGEN_TYPE_CASTING_SYCL_H +#endif // EIGEN_TYPE_CASTING_SYCL_H diff --git a/Eigen/src/Core/functors/UnaryFunctors.h b/Eigen/src/Core/functors/UnaryFunctors.h index 56522a38f..d17aaf9c9 100644 --- a/Eigen/src/Core/functors/UnaryFunctors.h +++ b/Eigen/src/Core/functors/UnaryFunctors.h @@ -762,7 +762,7 @@ template struct scalar_isnan_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_isnan_op) typedef bool result_type; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) return numext::isnan(a); #else return (numext::isnan)(a); @@ -786,7 +786,7 @@ template struct scalar_isinf_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_isinf_op) typedef bool result_type; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) return numext::isinf(a); #else return (numext::isinf)(a); @@ -810,7 +810,7 @@ template struct scalar_isfinite_op { EIGEN_EMPTY_STRUCT_CTOR(scalar_isfinite_op) typedef bool result_type; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type operator() (const Scalar& a) const { -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) return numext::isfinite(a); #else return (numext::isfinite)(a); diff --git a/Eigen/src/Core/util/Macros.h b/Eigen/src/Core/util/Macros.h index 31f91bd49..bea3a1432 100644 --- a/Eigen/src/Core/util/Macros.h +++ b/Eigen/src/Core/util/Macros.h @@ -497,6 +497,12 @@ // #endif +#if defined(EIGEN_USE_SYCL) && defined(__SYCL_DEVICE_ONLY__) +// EIGEN_USE_SYCL is a user-defined macro while __SYCL_DEVICE_ONLY__ is a compiler-defined macro. +// In most cases we want to check if both macros are defined which can be done using the define below. +#define SYCL_DEVICE_ONLY +#endif + //------------------------------------------------------------------------------------------ // Detect Compiler/Architecture/OS specific features //------------------------------------------------------------------------------------------ @@ -583,7 +589,7 @@ ((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901)) \ || (defined(__GNUC__) && defined(_GLIBCXX_USE_C99)) \ || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) \ - || (EIGEN_COMP_MSVC >= 1900) || defined(__SYCL_DEVICE_ONLY__)) + || (EIGEN_COMP_MSVC >= 1900) || defined(SYCL_DEVICE_ONLY)) #define EIGEN_HAS_C99_MATH 1 #else #define EIGEN_HAS_C99_MATH 0 @@ -639,7 +645,7 @@ // ^^ Disable the use of variadic templates when compiling with versions of nvcc older than 8.0 on ARM devices: // this prevents nvcc from crashing when compiling Eigen on Tegra X1 #define EIGEN_HAS_VARIADIC_TEMPLATES 1 -#elif EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) && defined(__SYCL_DEVICE_ONLY__) +#elif EIGEN_MAX_CPP_VER>=11 && (__cplusplus > 199711L || EIGEN_COMP_MSVC >= 1900) && defined(SYCL_DEVICE_ONLY) #define EIGEN_HAS_VARIADIC_TEMPLATES 1 #else #define EIGEN_HAS_VARIADIC_TEMPLATES 0 @@ -791,7 +797,7 @@ // Eval.h:91: sorry, unimplemented: inlining failed in call to 'const Eigen::Eval Eigen::MatrixBase::eval() const' // : function body not available // See also bug 1367 -#if EIGEN_GNUC_AT_LEAST(4,2) +#if EIGEN_GNUC_AT_LEAST(4,2) && !defined(SYCL_DEVICE_ONLY) #define EIGEN_ALWAYS_INLINE __attribute__((always_inline)) inline #else #define EIGEN_ALWAYS_INLINE EIGEN_STRONG_INLINE @@ -814,7 +820,7 @@ // GPU stuff // Disable some features when compiling with GPU compilers (NVCC/clang-cuda/SYCL/HIPCC) -#if defined(EIGEN_CUDACC) || defined(__SYCL_DEVICE_ONLY__) || defined(EIGEN_HIPCC) +#if defined(EIGEN_CUDACC) || defined(SYCL_DEVICE_ONLY) || defined(EIGEN_HIPCC) // Do not try asserts on device code #ifndef EIGEN_NO_DEBUG #define EIGEN_NO_DEBUG @@ -829,9 +835,14 @@ #endif #endif +#if defined(SYCL_DEVICE_ONLY) + #ifndef EIGEN_DONT_VECTORIZE + #define EIGEN_DONT_VECTORIZE + #endif + #define EIGEN_DEVICE_FUNC __attribute__((always_inline)) // All functions callable from CUDA/HIP code must be qualified with __device__ -#ifdef EIGEN_GPUCC - #define EIGEN_DEVICE_FUNC __host__ __device__ +#elif defined(EIGEN_GPUCC) + #define EIGEN_DEVICE_FUNC __host__ __device__ #else #define EIGEN_DEVICE_FUNC #endif @@ -852,8 +863,12 @@ // eigen_plain_assert is where we implement the workaround for the assert() bug in GCC <= 4.3, see bug 89 #ifdef EIGEN_NO_DEBUG - #define eigen_plain_assert(x) -#else + #ifdef SYCL_DEVICE_ONLY // used to silence the warning on SYCL device + #define eigen_plain_assert(x) EIGEN_UNUSED_VARIABLE(x) + #else + #define eigen_plain_assert(x) + #endif +#else #if EIGEN_SAFE_TO_USE_STANDARD_ASSERT_MACRO namespace Eigen { namespace internal { @@ -1211,7 +1226,7 @@ bool all(T t, Ts ... ts){ return t && all(ts...); } #endif // Wrapping #pragma unroll in a macro since it is required for SYCL -#if defined(__SYCL_DEVICE_ONLY__) +#if defined(SYCL_DEVICE_ONLY) #if defined(_MSC_VER) #define EIGEN_UNROLL_LOOP __pragma(unroll) #else diff --git a/test/main.h b/test/main.h index 93e894460..4c1733b1f 100644 --- a/test/main.h +++ b/test/main.h @@ -70,7 +70,7 @@ // protected by parenthesis against macro expansion, the min()/max() macros // are defined here and any not-parenthesized min/max call will cause a // compiler error. -#if !defined(__HIPCC__) +#if !defined(__HIPCC__) && !defined(EIGEN_USE_SYCL) // // HIP header files include the following files // @@ -277,7 +277,7 @@ namespace Eigen } #endif //EIGEN_EXCEPTIONS - #elif !defined(__CUDACC__) && !defined(__HIPCC__) && !defined(__SYCL_DEVICE_ONLY__) // EIGEN_DEBUG_ASSERTS + #elif !defined(__CUDACC__) && !defined(__HIPCC__) && !defined(SYCL_DEVICE_ONLY) // EIGEN_DEBUG_ASSERTS // see bug 89. The copy_bool here is working around a bug in gcc <= 4.3 #define eigen_assert(a) \ if( (!Eigen::internal::copy_bool(a)) && (!no_more_assert) )\ @@ -334,7 +334,7 @@ namespace Eigen std::cout << "Can't VERIFY_RAISES_STATIC_ASSERT( " #a " ) with exceptions disabled\n"; #endif - #if !defined(__CUDACC__) && !defined(__HIPCC__) && !defined(__SYCL_DEVICE_ONLY__) + #if !defined(__CUDACC__) && !defined(__HIPCC__) && !defined(SYCL_DEVICE_ONLY) #define EIGEN_USE_CUSTOM_ASSERT #endif -- cgit v1.2.3