aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/SYCL
diff options
context:
space:
mode:
authorGravatar Mehdi Goli <mehdi.goli@codeplay.com>2019-06-27 12:25:09 +0100
committerGravatar Mehdi Goli <mehdi.goli@codeplay.com>2019-06-27 12:25:09 +0100
commit16a56b2dddbfaf2d4b81d62be5e3139f12783ac8 (patch)
tree9ce9ce2f27b9cfadfc34004aecede743e65b6d51 /Eigen/src/Core/arch/SYCL
parentadec097c61bd2ff049378b063a4665910c1ed5cc (diff)
[SYCL] This PR adds the minimum modifications to Eigen core required to run Eigen unsupported modules on devices supporting SYCL.
* Adding SYCL memory model * Enabling/Disabling SYCL backend in Core * Supporting Vectorization
Diffstat (limited to 'Eigen/src/Core/arch/SYCL')
-rw-r--r--Eigen/src/Core/arch/SYCL/InteropHeaders.h233
-rw-r--r--Eigen/src/Core/arch/SYCL/MathFunctions.h207
-rw-r--r--Eigen/src/Core/arch/SYCL/PacketMath.h537
-rw-r--r--Eigen/src/Core/arch/SYCL/TypeCasting.h64
4 files changed, 657 insertions, 384 deletions
diff --git a/Eigen/src/Core/arch/SYCL/InteropHeaders.h b/Eigen/src/Core/arch/SYCL/InteropHeaders.h
index b09d45ea1..ef66fc7de 100644
--- a/Eigen/src/Core/arch/SYCL/InteropHeaders.h
+++ b/Eigen/src/Core/arch/SYCL/InteropHeaders.h
@@ -16,58 +16,67 @@
* \brief:
* InteropHeaders
*
-*****************************************************************/
+ *****************************************************************/
#ifndef EIGEN_INTEROP_HEADERS_SYCL_H
#define EIGEN_INTEROP_HEADERS_SYCL_H
-#if defined EIGEN_USE_SYCL
+
namespace Eigen {
+#if !defined(EIGEN_DONT_VECTORIZE_SYCL)
+
namespace internal {
-#define SYCL_PACKET_TRAITS(packet_type, val, unpacket_type, lengths)\
- template<> struct packet_traits<unpacket_type> : default_packet_traits\
- {\
- typedef packet_type type;\
- typedef packet_type half;\
- enum {\
- Vectorizable = 1,\
- AlignedOnScalar = 1,\
- size=lengths,\
- HasHalfPacket = 0,\
- HasDiv = 1,\
- HasLog = 1,\
- HasExp = 1,\
- HasSqrt = 1,\
- HasRsqrt = 1,\
- HasSin = 1,\
- HasCos = 1,\
- HasTan = 1,\
- HasASin = 1,\
- HasACos = 1,\
- HasATan = 1,\
- HasSinh = 1,\
- HasCosh = 1,\
- HasTanh = 1,\
- HasLGamma = 0,\
- HasDiGamma = 0,\
- HasZeta = 0,\
- HasPolygamma = 0,\
- HasErf = 0,\
- HasErfc = 0,\
- HasIGamma = 0,\
- HasIGammac = 0,\
- HasBetaInc = 0,\
- HasBlend = val,\
- HasMax=1,\
- HasMin=1,\
- HasMul=1,\
- HasAdd=1,\
- HasFloor=1,\
- HasRound=1,\
- HasLog1p=1,\
- HasExpm1=1,\
- HasCeil=1,\
- };\
+
+template <int has_blend, int lengths>
+struct sycl_packet_traits : default_packet_traits {
+ enum {
+ Vectorizable = 1,
+ AlignedOnScalar = 1,
+ size = lengths,
+ HasHalfPacket = 0,
+ HasDiv = 1,
+ HasLog = 1,
+ HasExp = 1,
+ HasSqrt = 1,
+ HasRsqrt = 1,
+ HasSin = 1,
+ HasCos = 1,
+ HasTan = 1,
+ HasASin = 1,
+ HasACos = 1,
+ HasATan = 1,
+ HasSinh = 1,
+ HasCosh = 1,
+ HasTanh = 1,
+ HasLGamma = 0,
+ HasDiGamma = 0,
+ HasZeta = 0,
+ HasPolygamma = 0,
+ HasErf = 0,
+ HasErfc = 0,
+ HasIGamma = 0,
+ HasIGammac = 0,
+ HasBetaInc = 0,
+ HasBlend = has_blend,
+ HasMax = 1,
+ HasMin = 1,
+ HasMul = 1,
+ HasAdd = 1,
+ HasFloor = 1,
+ HasRound = 1,
+ HasLog1p = 1,
+ HasExpm1 = 1,
+ HasCeil = 1,
+ };
+};
+
+#ifdef SYCL_DEVICE_ONLY
+#define SYCL_PACKET_TRAITS(packet_type, has_blend, unpacket_type, lengths) \
+ template <> \
+ struct packet_traits<unpacket_type> \
+ : sycl_packet_traits<has_blend, lengths> { \
+ typedef packet_type type; \
+ typedef packet_type half; \
};
SYCL_PACKET_TRAITS(cl::sycl::cl_float4, 1, float, 4)
@@ -76,29 +85,137 @@ SYCL_PACKET_TRAITS(cl::sycl::cl_double2, 0, double, 2)
SYCL_PACKET_TRAITS(cl::sycl::cl_double2, 0, const double, 2)
#undef SYCL_PACKET_TRAITS
-
// Make sure this is only available when targeting a GPU: we don't want to
// introduce conflicts between these packet_traits definitions and the ones
// we'll use on the host side (SSE, AVX, ...)
-#define SYCL_ARITHMETIC(packet_type) template<> struct is_arithmetic<packet_type> { enum { value = true }; };
+#define SYCL_ARITHMETIC(packet_type) \
+ template <> \
+ struct is_arithmetic<packet_type> { \
+ enum { value = true }; \
+ };
SYCL_ARITHMETIC(cl::sycl::cl_float4)
SYCL_ARITHMETIC(cl::sycl::cl_double2)
#undef SYCL_ARITHMETIC
-#define SYCL_UNPACKET_TRAITS(packet_type, unpacket_type, lengths)\
-template<> struct unpacket_traits<packet_type> {\
- typedef unpacket_type type;\
- enum {size=lengths, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false};\
- typedef packet_type half;\
-};
+#define SYCL_UNPACKET_TRAITS(packet_type, unpacket_type, lengths) \
+ template <> \
+ struct unpacket_traits<packet_type> { \
+ typedef unpacket_type type; \
+ enum { size = lengths, vectorizable = true, alignment = Aligned16 }; \
+ typedef packet_type half; \
+ };
SYCL_UNPACKET_TRAITS(cl::sycl::cl_float4, float, 4)
SYCL_UNPACKET_TRAITS(cl::sycl::cl_double2, double, 2)
#undef SYCL_UNPACKET_TRAITS
+#endif
+
+} // end namespace internal
+
+#endif
+
+namespace TensorSycl {
+namespace internal {
+
+template <typename PacketReturnType, int PacketSize>
+struct PacketWrapper;
+// This function should never get called on the device
+#ifndef SYCL_DEVICE_ONLY
+template <typename PacketReturnType, int PacketSize>
+struct PacketWrapper {
+ typedef typename ::Eigen::internal::unpacket_traits<PacketReturnType>::type
+ Scalar;
+ template <typename Index>
+ EIGEN_DEVICE_FUNC static Scalar scalarize(Index, PacketReturnType &) {
+ eigen_assert(false && "THERE IS NO PACKETIZE VERSION FOR THE CHOSEN TYPE");
+ abort();
+ }
+ EIGEN_DEVICE_FUNC static PacketReturnType convert_to_packet_type(Scalar in,
+ Scalar) {
+ return ::Eigen::internal::template plset<PacketReturnType>(in);
+ }
+ EIGEN_DEVICE_FUNC static void set_packet(PacketReturnType, Scalar *) {
+ eigen_assert(false && "THERE IS NO PACKETIZE VERSION FOR THE CHOSEN TYPE");
+ abort();
+ }
+};
+
+#elif defined(SYCL_DEVICE_ONLY)
+template <typename PacketReturnType>
+struct PacketWrapper<PacketReturnType, 4> {
+ typedef typename ::Eigen::internal::unpacket_traits<PacketReturnType>::type
+ Scalar;
+ template <typename Index>
+ EIGEN_DEVICE_FUNC static Scalar scalarize(Index index, PacketReturnType &in) {
+ switch (index) {
+ case 0:
+ return in.x();
+ case 1:
+ return in.y();
+ case 2:
+ return in.z();
+ case 3:
+ return in.w();
+ default:
+ eigen_assert(false && "INDEX MUST BE BETWEEN 0 and 3");
+ abort();
+ }
+ }
+ EIGEN_DEVICE_FUNC static PacketReturnType convert_to_packet_type(
+ Scalar in, Scalar other) {
+ return PacketReturnType(in, other, other, other);
+ }
+ EIGEN_DEVICE_FUNC static void set_packet(PacketReturnType &lhs, Scalar *rhs) {
+ lhs = PacketReturnType(rhs[0], rhs[1], rhs[2], rhs[3]);
+ }
+};
+
+template <typename PacketReturnType>
+struct PacketWrapper<PacketReturnType, 1> {
+ typedef typename ::Eigen::internal::unpacket_traits<PacketReturnType>::type
+ Scalar;
+ template <typename Index>
+ EIGEN_DEVICE_FUNC static Scalar scalarize(Index, PacketReturnType &in) {
+ return in;
+ }
+ EIGEN_DEVICE_FUNC static PacketReturnType convert_to_packet_type(Scalar in,
+ Scalar) {
+ return PacketReturnType(in);
+ }
+ EIGEN_DEVICE_FUNC static void set_packet(PacketReturnType &lhs, Scalar *rhs) {
+ lhs = rhs[0];
+ }
+};
+
+template <typename PacketReturnType>
+struct PacketWrapper<PacketReturnType, 2> {
+ typedef typename ::Eigen::internal::unpacket_traits<PacketReturnType>::type
+ Scalar;
+ template <typename Index>
+ EIGEN_DEVICE_FUNC static Scalar scalarize(Index index, PacketReturnType &in) {
+ switch (index) {
+ case 0:
+ return in.x();
+ case 1:
+ return in.y();
+ default:
+ eigen_assert(false && "INDEX MUST BE BETWEEN 0 and 1");
+ abort();
+ }
+ }
+ EIGEN_DEVICE_FUNC static PacketReturnType convert_to_packet_type(
+ Scalar in, Scalar other) {
+ return PacketReturnType(in, other);
+ }
+ EIGEN_DEVICE_FUNC static void set_packet(PacketReturnType &lhs, Scalar *rhs) {
+ lhs = PacketReturnType(rhs[0], rhs[1]);
+ }
+};
-} // end namespace internal
+#endif
-} // end namespace Eigen
+} // end namespace internal
+} // end namespace TensorSycl
+} // end namespace Eigen
-#endif // EIGEN_USE_SYCL
-#endif // EIGEN_INTEROP_HEADERS_SYCL_H
+#endif // EIGEN_INTEROP_HEADERS_SYCL_H
diff --git a/Eigen/src/Core/arch/SYCL/MathFunctions.h b/Eigen/src/Core/arch/SYCL/MathFunctions.h
index 422839c6c..9e16e6c3f 100644
--- a/Eigen/src/Core/arch/SYCL/MathFunctions.h
+++ b/Eigen/src/Core/arch/SYCL/MathFunctions.h
@@ -16,7 +16,7 @@
* \brief:
* MathFunctions
*
-*****************************************************************/
+ *****************************************************************/
#ifndef EIGEN_MATH_FUNCTIONS_SYCL_H
#define EIGEN_MATH_FUNCTIONS_SYCL_H
@@ -28,194 +28,251 @@ namespace internal {
// Make sure this is only available when targeting a GPU: we don't want to
// introduce conflicts between these packet_traits definitions and the ones
// we'll use on the host side (SSE, AVX, ...)
-//#if defined(__SYCL_DEVICE_ONLY__) && defined(EIGEN_USE_SYCL)
-#define SYCL_PLOG(packet_type) \
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-packet_type plog<packet_type>(const packet_type& a) { return cl::sycl::log(a); }
+#if defined(SYCL_DEVICE_ONLY)
+#define SYCL_PLOG(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type plog<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::log(a); \
+ }
SYCL_PLOG(cl::sycl::cl_float4)
SYCL_PLOG(cl::sycl::cl_double2)
#undef SYCL_PLOG
-#define SYCL_PLOG1P(packet_type) \
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-packet_type plog1p<packet_type>(const packet_type& a) { return cl::sycl::log1p(a); }
+#define SYCL_PLOG1P(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type plog1p<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::log1p(a); \
+ }
SYCL_PLOG1P(cl::sycl::cl_float4)
SYCL_PLOG1P(cl::sycl::cl_double2)
#undef SYCL_PLOG1P
-#define SYCL_PLOG10(packet_type) \
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-packet_type plog10<packet_type>(const packet_type& a) { return cl::sycl::log10(a); }
+#define SYCL_PLOG10(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type plog10<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::log10(a); \
+ }
SYCL_PLOG10(cl::sycl::cl_float4)
SYCL_PLOG10(cl::sycl::cl_double2)
#undef SYCL_PLOG10
-#define SYCL_PEXP(packet_type) \
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-packet_type pexp<packet_type>(const packet_type& a) { return cl::sycl::exp(a); }
+#define SYCL_PEXP(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pexp<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::exp(a); \
+ }
SYCL_PEXP(cl::sycl::cl_float4)
SYCL_PEXP(cl::sycl::cl_double2)
#undef SYCL_PEXP
-#define SYCL_PEXPM1(packet_type) \
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-packet_type pexpm1<packet_type>(const packet_type& a) { return cl::sycl::expm1(a); }
+#define SYCL_PEXPM1(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pexpm1<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::expm1(a); \
+ }
SYCL_PEXPM1(cl::sycl::cl_float4)
SYCL_PEXPM1(cl::sycl::cl_double2)
#undef SYCL_PEXPM1
-#define SYCL_PSQRT(packet_type) \
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-packet_type psqrt<packet_type>(const packet_type& a) { return cl::sycl::sqrt(a); }
+#define SYCL_PSQRT(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type psqrt<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::sqrt(a); \
+ }
SYCL_PSQRT(cl::sycl::cl_float4)
SYCL_PSQRT(cl::sycl::cl_double2)
#undef SYCL_PSQRT
-
-#define SYCL_PRSQRT(packet_type) \
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-packet_type prsqrt<packet_type>(const packet_type& a) { return cl::sycl::rsqrt(a); }
+#define SYCL_PRSQRT(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type prsqrt<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::rsqrt(a); \
+ }
SYCL_PRSQRT(cl::sycl::cl_float4)
SYCL_PRSQRT(cl::sycl::cl_double2)
#undef SYCL_PRSQRT
-
/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
-#define SYCL_PSIN(packet_type) \
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-packet_type psin<packet_type>(const packet_type& a) { return cl::sycl::sin(a); }
+#define SYCL_PSIN(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type psin<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::sin(a); \
+ }
SYCL_PSIN(cl::sycl::cl_float4)
SYCL_PSIN(cl::sycl::cl_double2)
#undef SYCL_PSIN
-
/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
-#define SYCL_PCOS(packet_type) \
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-packet_type pcos<packet_type>(const packet_type& a) { return cl::sycl::cos(a); }
+#define SYCL_PCOS(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pcos<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::cos(a); \
+ }
SYCL_PCOS(cl::sycl::cl_float4)
SYCL_PCOS(cl::sycl::cl_double2)
#undef SYCL_PCOS
/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
-#define SYCL_PTAN(packet_type) \
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-packet_type ptan<packet_type>(const packet_type& a) { return cl::sycl::tan(a); }
+#define SYCL_PTAN(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ptan<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::tan(a); \
+ }
SYCL_PTAN(cl::sycl::cl_float4)
SYCL_PTAN(cl::sycl::cl_double2)
#undef SYCL_PTAN
/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
-#define SYCL_PASIN(packet_type) \
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-packet_type pasin<packet_type>(const packet_type& a) { return cl::sycl::asin(a); }
+#define SYCL_PASIN(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pasin<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::asin(a); \
+ }
SYCL_PASIN(cl::sycl::cl_float4)
SYCL_PASIN(cl::sycl::cl_double2)
#undef SYCL_PASIN
-
/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
-#define SYCL_PACOS(packet_type) \
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-packet_type pacos<packet_type>(const packet_type& a) { return cl::sycl::acos(a); }
+#define SYCL_PACOS(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pacos<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::acos(a); \
+ }
SYCL_PACOS(cl::sycl::cl_float4)
SYCL_PACOS(cl::sycl::cl_double2)
#undef SYCL_PACOS
/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
-#define SYCL_PATAN(packet_type) \
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-packet_type patan<packet_type>(const packet_type& a) { return cl::sycl::atan(a); }
+#define SYCL_PATAN(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type patan<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::atan(a); \
+ }
SYCL_PATAN(cl::sycl::cl_float4)
SYCL_PATAN(cl::sycl::cl_double2)
#undef SYCL_PATAN
/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
-#define SYCL_PSINH(packet_type) \
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-packet_type psinh<packet_type>(const packet_type& a) { return cl::sycl::sinh(a); }
+#define SYCL_PSINH(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type psinh<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::sinh(a); \
+ }
SYCL_PSINH(cl::sycl::cl_float4)
SYCL_PSINH(cl::sycl::cl_double2)
#undef SYCL_PSINH
/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
-#define SYCL_PCOSH(packet_type) \
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-packet_type pcosh<packet_type>(const packet_type& a) { return cl::sycl::cosh(a); }
+#define SYCL_PCOSH(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pcosh<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::cosh(a); \
+ }
SYCL_PCOSH(cl::sycl::cl_float4)
SYCL_PCOSH(cl::sycl::cl_double2)
#undef SYCL_PCOSH
/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
-#define SYCL_PTANH(packet_type) \
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-packet_type ptanh<packet_type>(const packet_type& a) { return cl::sycl::tanh(a); }
+#define SYCL_PTANH(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ptanh<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::tanh(a); \
+ }
SYCL_PTANH(cl::sycl::cl_float4)
SYCL_PTANH(cl::sycl::cl_double2)
#undef SYCL_PTANH
-#define SYCL_PCEIL(packet_type) \
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-packet_type pceil<packet_type>(const packet_type& a) { return cl::sycl::ceil(a); }
+#define SYCL_PCEIL(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pceil<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::ceil(a); \
+ }
SYCL_PCEIL(cl::sycl::cl_float4)
SYCL_PCEIL(cl::sycl::cl_double2)
#undef SYCL_PCEIL
-
-#define SYCL_PROUND(packet_type) \
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-packet_type pround<packet_type>(const packet_type& a) { return cl::sycl::round(a); }
+#define SYCL_PROUND(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pround<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::round(a); \
+ }
SYCL_PROUND(cl::sycl::cl_float4)
SYCL_PROUND(cl::sycl::cl_double2)
#undef SYCL_PROUND
-#define SYCL_FLOOR(packet_type) \
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-packet_type pfloor<packet_type>(const packet_type& a) { return cl::sycl::floor(a); }
+#define SYCL_FLOOR(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pfloor<packet_type>( \
+ const packet_type& a) { \
+ return cl::sycl::floor(a); \
+ }
SYCL_FLOOR(cl::sycl::cl_float4)
SYCL_FLOOR(cl::sycl::cl_double2)
#undef SYCL_FLOOR
-
-#define SYCL_PMIN(packet_type, expr) \
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-packet_type pmin<packet_type>(const packet_type& a, const packet_type& b) { return expr; }
+#define SYCL_PMIN(packet_type, expr) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pmin<packet_type>( \
+ const packet_type& a, const packet_type& b) { \
+ return expr; \
+ }
SYCL_PMIN(cl::sycl::cl_float4, cl::sycl::fmin(a, b))
SYCL_PMIN(cl::sycl::cl_double2, cl::sycl::fmin(a, b))
#undef SYCL_PMIN
-#define SYCL_PMAX(packet_type, expr) \
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
-packet_type pmax<packet_type>(const packet_type& a, const packet_type& b) { return expr; }
+#define SYCL_PMAX(packet_type, expr) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type pmax<packet_type>( \
+ const packet_type& a, const packet_type& b) { \
+ return expr; \
+ }
SYCL_PMAX(cl::sycl::cl_float4, cl::sycl::fmax(a, b))
SYCL_PMAX(cl::sycl::cl_double2, cl::sycl::fmax(a, b))
#undef SYCL_PMAX
-//#endif
+#endif
-} // end namespace internal
+} // end namespace internal
-} // end namespace Eigen
+} // end namespace Eigen
-#endif // EIGEN_MATH_FUNCTIONS_CUDA_H
+#endif // EIGEN_MATH_FUNCTIONS_SYCL_H
diff --git a/Eigen/src/Core/arch/SYCL/PacketMath.h b/Eigen/src/Core/arch/SYCL/PacketMath.h
index 820a83311..a9adb64ba 100644
--- a/Eigen/src/Core/arch/SYCL/PacketMath.h
+++ b/Eigen/src/Core/arch/SYCL/PacketMath.h
@@ -16,85 +16,122 @@
* \brief:
* PacketMath
*
-*****************************************************************/
+ *****************************************************************/
#ifndef EIGEN_PACKET_MATH_SYCL_H
#define EIGEN_PACKET_MATH_SYCL_H
#include <type_traits>
-#if defined EIGEN_USE_SYCL
namespace Eigen {
namespace internal {
-
-#define SYCL_PLOADT_RO(address_space_target)\
-template<typename packet_type, int Alignment>\
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type\
- ploadt_ro(typename cl::sycl::multi_ptr<const typename unpacket_traits<packet_type>::type,\
- cl::sycl::access::address_space::address_space_target>::pointer_t from) {\
- typedef typename unpacket_traits<packet_type>::type scalar;\
- typedef cl::sycl::multi_ptr<scalar, cl::sycl::access::address_space::address_space_target> multi_ptr;\
- auto res=packet_type(static_cast<typename unpacket_traits<packet_type>::type>(0));\
- res.load(0, multi_ptr(const_cast<typename multi_ptr::pointer_t>(from)));\
- return res;\
-}
+#ifdef SYCL_DEVICE_ONLY
+
+#define SYCL_PLOADT_RO(address_space_target) \
+ template <typename packet_type, int Alignment> \
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt_ro( \
+ typename cl::sycl::multi_ptr< \
+ const typename unpacket_traits<packet_type>::type, \
+ cl::sycl::access::address_space::address_space_target>::pointer_t \
+ from) { \
+ typedef typename unpacket_traits<packet_type>::type scalar; \
+ typedef cl::sycl::multi_ptr< \
+ scalar, cl::sycl::access::address_space::address_space_target> \
+ multi_ptr; \
+ auto res = packet_type( \
+ static_cast<typename unpacket_traits<packet_type>::type>(0)); \
+ res.load(0, multi_ptr(const_cast<typename multi_ptr::pointer_t>(from))); \
+ return res; \
+ }
SYCL_PLOADT_RO(global_space)
SYCL_PLOADT_RO(local_space)
-
#undef SYCL_PLOADT_RO
+#endif
+template <typename packet_type, int Alignment, typename T>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type
+ploadt_ro(const Eigen::TensorSycl::internal::RangeAccess<
+ cl::sycl::access::mode::read_write, T>& from) {
+ return ploadt_ro<packet_type, Alignment>(from.get_pointer());
+}
-#define SYCL_PLOAD(address_space_target, Alignment, AlignedType)\
-template<typename packet_type> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type\
- pload##AlignedType(typename cl::sycl::multi_ptr<const typename unpacket_traits<packet_type>::type,\
- cl::sycl::access::address_space::address_space_target>::pointer_t from) {\
- return ploadt_ro<packet_type, Alignment>(from);\
- }
+#ifdef SYCL_DEVICE_ONLY
+#define SYCL_PLOAD(address_space_target, Alignment, AlignedType) \
+ template <typename packet_type> \
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType( \
+ typename cl::sycl::multi_ptr< \
+ const typename unpacket_traits<packet_type>::type, \
+ cl::sycl::access::address_space::address_space_target>::pointer_t \
+ from) { \
+ return ploadt_ro<packet_type, Alignment>(from); \
+ }
// global space
SYCL_PLOAD(global_space, Unaligned, u)
SYCL_PLOAD(global_space, Aligned, )
-
// local space
SYCL_PLOAD(local_space, Unaligned, u)
SYCL_PLOAD(local_space, Aligned, )
-// private space
-//SYCL_PLOAD(private_space, Unaligned, u)
-//SYCL_PLOAD(private_space, Aligned, )
-
+#undef SYCL_PLOAD
+#endif
+
+#define SYCL_PLOAD(Alignment, AlignedType) \
+ template <typename packet_type> \
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType( \
+ const Eigen::TensorSycl::internal::RangeAccess< \
+ cl::sycl::access::mode::read_write, \
+ typename unpacket_traits<packet_type>::type> \
+ from) { \
+ return ploadt_ro<packet_type, Alignment>(from); \
+ }
+SYCL_PLOAD(Unaligned, u)
+SYCL_PLOAD(Aligned, )
#undef SYCL_PLOAD
-
+#ifdef SYCL_DEVICE_ONLY
/** \internal \returns a packet version of \a *from.
- * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
-#define SYCL_PLOADT(address_space_target)\
-template<typename packet_type, int Alignment>\
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt(\
- typename cl::sycl::multi_ptr<const typename unpacket_traits<packet_type>::type,\
- cl::sycl::access::address_space::address_space_target>::pointer_t from)\
-{\
- if(Alignment >= unpacket_traits<packet_type>::alignment)\
- return pload<packet_type>(from);\
- else\
- return ploadu<packet_type>(from);\
-}
+ * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
+#define SYCL_PLOADT(address_space_target) \
+ template <typename packet_type, int Alignment> \
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt( \
+ typename cl::sycl::multi_ptr< \
+ const typename unpacket_traits<packet_type>::type, \
+ cl::sycl::access::address_space::address_space_target>::pointer_t \
+ from) { \
+ if (Alignment >= unpacket_traits<packet_type>::alignment) \
+ return pload<packet_type>(from); \
+ else \
+ return ploadu<packet_type>(from); \
+ }
// global space
SYCL_PLOADT(global_space)
// local space
SYCL_PLOADT(local_space)
+#undef SYCL_PLOADT
+#endif
+
+template <typename packet_type, int Alignment>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type
+ploadt(const Eigen::TensorSycl::internal::RangeAccess<
+ cl::sycl::access::mode::read_write,
+ typename unpacket_traits<packet_type>::type>& from) {
+ return ploadt<packet_type, Alignment>(from.get_pointer());
+}
+#ifdef SYCL_DEVICE_ONLY
-//private_space
-// There is no need to specialise it for private space as it can use the GenericPacketMath version
-
-#define SYCL_PLOADT_RO_SPECIAL(packet_type, Alignment)\
- template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type\
- ploadt_ro<packet_type, Alignment>(const typename unpacket_traits<packet_type>::type * from) { \
- typedef typename unpacket_traits<packet_type>::type scalar;\
- auto res=packet_type(static_cast<scalar>(0));\
- res. template load<cl::sycl::access::address_space::private_space>(0, const_cast<scalar*>(from));\
- return res;\
+// private_space
+#define SYCL_PLOADT_RO_SPECIAL(packet_type, Alignment) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type \
+ ploadt_ro<packet_type, Alignment>( \
+ const typename unpacket_traits<packet_type>::type* from) { \
+ typedef typename unpacket_traits<packet_type>::type scalar; \
+ auto res = packet_type(static_cast<scalar>(0)); \
+ res.template load<cl::sycl::access::address_space::private_space>( \
+ 0, const_cast<scalar*>(from)); \
+ return res; \
}
SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Aligned)
@@ -102,37 +139,42 @@ SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Aligned)
SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Unaligned)
SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Unaligned)
-
-#define SYCL_PLOAD_SPECIAL(packet_type, alignment_type)\
-template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type\
- pload##alignment_type(const typename unpacket_traits<packet_type>::type * from) { \
- typedef typename unpacket_traits<packet_type>::type scalar;\
- auto res=packet_type(static_cast<scalar>(0));\
- res. template load<cl::sycl::access::address_space::private_space>(0, const_cast<scalar*>(from));\
- return res;\
+#define SYCL_PLOAD_SPECIAL(packet_type, alignment_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##alignment_type( \
+ const typename unpacket_traits<packet_type>::type* from) { \
+ typedef typename unpacket_traits<packet_type>::type scalar; \
+ auto res = packet_type(static_cast<scalar>(0)); \
+ res.template load<cl::sycl::access::address_space::private_space>( \
+ 0, const_cast<scalar*>(from)); \
+ return res; \
}
-SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4,)
-SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2,)
+SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, )
+SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, )
SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, u)
SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, u)
#undef SYCL_PLOAD_SPECIAL
-#define SYCL_PSTORE(scalar, packet_type, address_space_target, alignment)\
-template<>\
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \
- typename cl::sycl::multi_ptr<scalar, cl::sycl::access::address_space::address_space_target>::pointer_t to, \
- const packet_type& from) {\
- typedef cl::sycl::multi_ptr<scalar, cl::sycl::access::address_space::address_space_target> multi_ptr;\
- from.store(0, multi_ptr(to));\
-}
+#define SYCL_PSTORE(scalar, packet_type, address_space_target, alignment) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \
+ typename cl::sycl::multi_ptr< \
+ scalar, \
+ cl::sycl::access::address_space::address_space_target>::pointer_t \
+ to, \
+ const packet_type& from) { \
+ typedef cl::sycl::multi_ptr< \
+ scalar, cl::sycl::access::address_space::address_space_target> \
+ multi_ptr; \
+ from.store(0, multi_ptr(to)); \
+ }
// global space
SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, )
SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, u)
SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, )
SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, u)
-
SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, )
SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, u)
SYCL_PSTORE(double, cl::sycl::cl_double2, local_space, )
@@ -142,36 +184,34 @@ SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, )
SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, u)
SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, )
SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, u)
+#undef SYCL_PSTORE
+
+#define SYCL_PSTORE_T(address_space_target) \
+ template <typename scalar, typename packet_type, int Alignment> \
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret( \
+ typename cl::sycl::multi_ptr< \
+ scalar, \
+ cl::sycl::access::address_space::address_space_target>::pointer_t \
+ to, \
+ const packet_type& from) { \
+ if (Alignment) \
+ pstore(to, from); \
+ else \
+ pstoreu(to, from); \
+ }
+SYCL_PSTORE_T(global_space)
-#define SYCL_PSTORE_T(scalar, packet_type, Alignment)\
-template<>\
-EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret<scalar, packet_type, Alignment>(\
- scalar* to,\
- const packet_type& from) {\
- if(Alignment)\
- pstore(to, from);\
- else\
- pstoreu(to,from);\
-}
-
-
-SYCL_PSTORE_T(float, cl::sycl::cl_float4, Aligned)
-
-SYCL_PSTORE_T(float, cl::sycl::cl_float4, Unaligned)
-
-SYCL_PSTORE_T(double, cl::sycl::cl_double2, Aligned)
-
-SYCL_PSTORE_T(double, cl::sycl::cl_double2, Unaligned)
-
+SYCL_PSTORE_T(local_space)
#undef SYCL_PSTORE_T
-#define SYCL_PSET1(packet_type)\
-template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pset1<packet_type>(\
- const typename unpacket_traits<packet_type>::type& from) {\
- return packet_type(from);\
-}
+#define SYCL_PSET1(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pset1<packet_type>( \
+ const typename unpacket_traits<packet_type>::type& from) { \
+ return packet_type(from); \
+ }
// global space
SYCL_PSET1(cl::sycl::cl_float4)
@@ -179,280 +219,343 @@ SYCL_PSET1(cl::sycl::cl_double2)
#undef SYCL_PSET1
-
-template <typename packet_type> struct get_base_packet {
-template <typename sycl_multi_pointer>
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type get_ploaddup(sycl_multi_pointer ) {}
+template <typename packet_type>
+struct get_base_packet {
+ template <typename sycl_multi_pointer>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type
+ get_ploaddup(sycl_multi_pointer) {}
template <typename sycl_multi_pointer>
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type get_pgather(sycl_multi_pointer , Index ) {}
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type
+ get_pgather(sycl_multi_pointer, Index) {}
};
-template <> struct get_base_packet <cl::sycl::cl_float4> {
+template <>
+struct get_base_packet<cl::sycl::cl_float4> {
template <typename sycl_multi_pointer>
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_ploaddup(sycl_multi_pointer from) {
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_ploaddup(
+ sycl_multi_pointer from) {
return cl::sycl::cl_float4(from[0], from[0], from[1], from[1]);
}
template <typename sycl_multi_pointer>
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_pgather(sycl_multi_pointer from, Index stride) {
- return cl::sycl::cl_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_pgather(
+ sycl_multi_pointer from, Index stride) {
+ return cl::sycl::cl_float4(from[0 * stride], from[1 * stride],
+ from[2 * stride], from[3 * stride]);
}
template <typename sycl_multi_pointer>
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(sycl_multi_pointer to , const cl::sycl::cl_float4& from, Index stride) {
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(
+ sycl_multi_pointer to, const cl::sycl::cl_float4& from, Index stride) {
auto tmp = stride;
to[0] = from.x();
to[tmp] = from.y();
to[tmp += stride] = from.z();
to[tmp += stride] = from.w();
- }
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 set_plset(const float& a) {
- return cl::sycl::cl_float4(static_cast<float>(a), static_cast<float>(a+1), static_cast<float>(a+2), static_cast<float>(a+3));
- }
+ }
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 set_plset(
+ const float& a) {
+ return cl::sycl::cl_float4(static_cast<float>(a), static_cast<float>(a + 1),
+ static_cast<float>(a + 2),
+ static_cast<float>(a + 3));
+ }
};
-template <> struct get_base_packet <cl::sycl::cl_double2> {
+template <>
+struct get_base_packet<cl::sycl::cl_double2> {
template <typename sycl_multi_pointer>
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_ploaddup(const sycl_multi_pointer from) {
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2
+ get_ploaddup(const sycl_multi_pointer from) {
return cl::sycl::cl_double2(from[0], from[0]);
}
template <typename sycl_multi_pointer, typename Index>
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_pgather(const sycl_multi_pointer from, Index stride) {
- return cl::sycl::cl_double2(from[0*stride], from[1*stride]);
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_pgather(
+ const sycl_multi_pointer from, Index stride) {
+ return cl::sycl::cl_double2(from[0 * stride], from[1 * stride]);
}
template <typename sycl_multi_pointer>
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(sycl_multi_pointer to , const cl::sycl::cl_double2& from, Index stride) {
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(
+ sycl_multi_pointer to, const cl::sycl::cl_double2& from, Index stride) {
to[0] = from.x();
to[stride] = from.y();
}
- static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 set_plset(const double& a) {
- return cl::sycl::cl_double2(static_cast<double>(a), static_cast<double>(a + 1));
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 set_plset(
+ const double& a) {
+ return cl::sycl::cl_double2(static_cast<double>(a),
+ static_cast<double>(a + 1));
}
};
-#define SYCL_PLOAD_DUP(address_space_target)\
-template<typename packet_type> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \
-ploaddup(typename cl::sycl::multi_ptr<const typename unpacket_traits<packet_type>::type,\
- cl::sycl::access::address_space::address_space_target>::pointer_t from)\
-{\
- return get_base_packet<packet_type>::get_ploaddup(from); \
-}
+#define SYCL_PLOAD_DUP(address_space_target) \
+ template <typename packet_type> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup( \
+ typename cl::sycl::multi_ptr< \
+ const typename unpacket_traits<packet_type>::type, \
+ cl::sycl::access::address_space::address_space_target>::pointer_t \
+ from) { \
+ return get_base_packet<packet_type>::get_ploaddup(from); \
+ }
// global space
SYCL_PLOAD_DUP(global_space)
// local_space
SYCL_PLOAD_DUP(local_space)
-// private_space
-//SYCL_PLOAD_DUP(private_space)
#undef SYCL_PLOAD_DUP
-#define SYCL_PLOAD_DUP_SPECILIZE(packet_type)\
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \
-ploaddup<packet_type>(const typename unpacket_traits<packet_type>::type * from)\
-{ \
- return get_base_packet<packet_type>::get_ploaddup(from); \
-}
+#define SYCL_PLOAD_DUP_SPECILIZE(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup<packet_type>( \
+ const typename unpacket_traits<packet_type>::type* from) { \
+ return get_base_packet<packet_type>::get_ploaddup(from); \
+ }
SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_float4)
SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_double2)
#undef SYCL_PLOAD_DUP_SPECILIZE
-#define SYCL_PLSET(packet_type)\
-template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type plset<packet_type>(const typename unpacket_traits<packet_type>::type& a) {\
- return get_base_packet<packet_type>::set_plset(a);\
-}
+#define SYCL_PLSET(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type plset<packet_type>( \
+ const typename unpacket_traits<packet_type>::type& a) { \
+ return get_base_packet<packet_type>::set_plset(a); \
+ }
SYCL_PLSET(cl::sycl::cl_float4)
SYCL_PLSET(cl::sycl::cl_double2)
#undef SYCL_PLSET
-
-#define SYCL_PGATHER(address_space_target)\
-template<typename Scalar, typename packet_type> EIGEN_DEVICE_FUNC inline packet_type pgather(\
- typename cl::sycl::multi_ptr<const typename unpacket_traits<packet_type>::type,\
- cl::sycl::access::address_space::address_space_target>::pointer_t from, Index stride) {\
- return get_base_packet<packet_type>::get_pgather(from, stride); \
-}
+#define SYCL_PGATHER(address_space_target) \
+ template <typename Scalar, typename packet_type> \
+ EIGEN_DEVICE_FUNC inline packet_type pgather( \
+ typename cl::sycl::multi_ptr< \
+ const typename unpacket_traits<packet_type>::type, \
+ cl::sycl::access::address_space::address_space_target>::pointer_t \
+ from, \
+ Index stride) { \
+ return get_base_packet<packet_type>::get_pgather(from, stride); \
+ }
// global space
SYCL_PGATHER(global_space)
// local space
SYCL_PGATHER(local_space)
-// private space
-//SYCL_PGATHER(private_space)
#undef SYCL_PGATHER
-
-#define SYCL_PGATHER_SPECILIZE(scalar, packet_type)\
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \
-pgather<scalar, packet_type>(const typename unpacket_traits<packet_type>::type * from, Index stride)\
-{ \
- return get_base_packet<packet_type>::get_pgather(from, stride); \
-}
+#define SYCL_PGATHER_SPECILIZE(scalar, packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \
+ pgather<scalar, packet_type>( \
+ const typename unpacket_traits<packet_type>::type* from, Index stride) { \
+ return get_base_packet<packet_type>::get_pgather(from, stride); \
+ }
SYCL_PGATHER_SPECILIZE(float, cl::sycl::cl_float4)
SYCL_PGATHER_SPECILIZE(double, cl::sycl::cl_double2)
#undef SYCL_PGATHER_SPECILIZE
-#define SYCL_PSCATTER(address_space_target)\
-template<typename Scalar, typename packet_type> EIGEN_DEVICE_FUNC inline void pscatter(\
- typename cl::sycl::multi_ptr<typename unpacket_traits<packet_type>::type,\
- cl::sycl::access::address_space::address_space_target>::pointer_t to,\
- const packet_type& from, Index stride) {\
- get_base_packet<packet_type>::set_pscatter(to, from, stride);\
-}
+#define SYCL_PSCATTER(address_space_target) \
+ template <typename Scalar, typename packet_type> \
+ EIGEN_DEVICE_FUNC inline void pscatter( \
+ typename cl::sycl::multi_ptr< \
+ typename unpacket_traits<packet_type>::type, \
+ cl::sycl::access::address_space::address_space_target>::pointer_t \
+ to, \
+ const packet_type& from, Index stride) { \
+ get_base_packet<packet_type>::set_pscatter(to, from, stride); \
+ }
// global space
SYCL_PSCATTER(global_space)
// local space
SYCL_PSCATTER(local_space)
-// private space
-//SYCL_PSCATTER(private_space)
#undef SYCL_PSCATTER
-
-
-#define SYCL_PSCATTER_SPECILIZE(scalar, packet_type)\
-template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void \
-pscatter<scalar, packet_type>(typename unpacket_traits<packet_type>::type * to, const packet_type& from, Index stride)\
-{ \
- get_base_packet<packet_type>::set_pscatter(to, from, stride);\
-}
+#define SYCL_PSCATTER_SPECILIZE(scalar, packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<scalar, packet_type>( \
+ typename unpacket_traits<packet_type>::type * to, \
+ const packet_type& from, Index stride) { \
+ get_base_packet<packet_type>::set_pscatter(to, from, stride); \
+ }
SYCL_PSCATTER_SPECILIZE(float, cl::sycl::cl_float4)
SYCL_PSCATTER_SPECILIZE(double, cl::sycl::cl_double2)
#undef SYCL_PSCATTER_SPECILIZE
-
-#define SYCL_PMAD(packet_type)\
-template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pmadd( const packet_type& a,\
- const packet_type& b, const packet_type& c){\
- return cl::sycl::mad(a,b,c);\
-}
+#define SYCL_PMAD(packet_type) \
+ template <> \
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pmadd( \
+ const packet_type& a, const packet_type& b, const packet_type& c) { \
+ return cl::sycl::mad(a, b, c); \
+ }
SYCL_PMAD(cl::sycl::cl_float4)
SYCL_PMAD(cl::sycl::cl_double2)
#undef SYCL_PMAD
-
-
-template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float pfirst<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float pfirst<cl::sycl::cl_float4>(
+ const cl::sycl::cl_float4& a) {
return a.x();
}
-template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double pfirst<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double pfirst<cl::sycl::cl_double2>(
+ const cl::sycl::cl_double2& a) {
return a.x();
}
-template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux<cl::sycl::cl_float4>(
+ const cl::sycl::cl_float4& a) {
return a.x() + a.y() + a.z() + a.w();
}
-template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux<cl::sycl::cl_double2>(
+ const cl::sycl::cl_double2& a) {
return a.x() + a.y();
}
-template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_max<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
- return cl::sycl::fmax(cl::sycl::fmax(a.x(), a.y()), cl::sycl::fmax(a.z(), a.w()));
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_max<cl::sycl::cl_float4>(
+ const cl::sycl::cl_float4& a) {
+ return cl::sycl::fmax(cl::sycl::fmax(a.x(), a.y()),
+ cl::sycl::fmax(a.z(), a.w()));
}
-template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_max<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_max<cl::sycl::cl_double2>(
+ const cl::sycl::cl_double2& a) {
return cl::sycl::fmax(a.x(), a.y());
}
-template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_min<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
- return cl::sycl::fmin(cl::sycl::fmin(a.x(), a.y()), cl::sycl::fmin(a.z(), a.w()));
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_min<cl::sycl::cl_float4>(
+ const cl::sycl::cl_float4& a) {
+ return cl::sycl::fmin(cl::sycl::fmin(a.x(), a.y()),
+ cl::sycl::fmin(a.z(), a.w()));
}
-template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_min<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_min<cl::sycl::cl_double2>(
+ const cl::sycl::cl_double2& a) {
return cl::sycl::fmin(a.x(), a.y());
}
-template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_mul<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_mul<cl::sycl::cl_float4>(
+ const cl::sycl::cl_float4& a) {
return a.x() * a.y() * a.z() * a.w();
}
-template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_mul<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_mul<cl::sycl::cl_double2>(
+ const cl::sycl::cl_double2& a) {
return a.x() * a.y();
}
-template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pabs<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
- return cl::sycl::cl_float4(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()), cl::sycl::fabs(a.z()), cl::sycl::fabs(a.w()));
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4
+pabs<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
+ return cl::sycl::cl_float4(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()),
+ cl::sycl::fabs(a.z()), cl::sycl::fabs(a.w()));
}
-template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2 pabs<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2
+pabs<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
return cl::sycl::cl_double2(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()));
}
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void
-ptranspose(PacketBlock<cl::sycl::cl_float4,4>& kernel) {
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(
+ PacketBlock<cl::sycl::cl_float4, 4>& kernel) {
float tmp = kernel.packet[0].y();
kernel.packet[0].y() = kernel.packet[1].x();
kernel.packet[1].x() = tmp;
-// std::swap(kernel.packet[0].y(), kernel.packet[1].x());
tmp = kernel.packet[0].z();
kernel.packet[0].z() = kernel.packet[2].x();
kernel.packet[2].x() = tmp;
- //std::swap(kernel.packet[0].z(), kernel.packet[2].x());
tmp = kernel.packet[0].w();
kernel.packet[0].w() = kernel.packet[3].x();
kernel.packet[3].x() = tmp;
- //std::swap(kernel.packet[0].w(), kernel.packet[3].x());
-
tmp = kernel.packet[1].z();
kernel.packet[1].z() = kernel.packet[2].y();
kernel.packet[2].y() = tmp;
-// std::swap(kernel.packet[1].z(), kernel.packet[2].y());
tmp = kernel.packet[1].w();
kernel.packet[1].w() = kernel.packet[3].y();
kernel.packet[3].y() = tmp;
-// std::swap(kernel.packet[1].w(), kernel.packet[3].y());
tmp = kernel.packet[2].w();
kernel.packet[2].w() = kernel.packet[3].z();
kernel.packet[3].z() = tmp;
-// std::swap(kernel.packet[2].w(), kernel.packet[3].z());
-
}
- EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void
-ptranspose(PacketBlock<cl::sycl::cl_double2,2>& kernel) {
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose(
+ PacketBlock<cl::sycl::cl_double2, 2>& kernel) {
double tmp = kernel.packet[0].y();
kernel.packet[0].y() = kernel.packet[1].x();
kernel.packet[1].x() = tmp;
-//std::swap(kernel.packet[0].y(), kernel.packet[1].x());
}
-
-template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4
-pblend(const Selector<unpacket_traits<cl::sycl::cl_float4>::size>& ifPacket,
- const cl::sycl::cl_float4& thenPacket, const cl::sycl::cl_float4& elsePacket) {
- cl::sycl::cl_int4 condition(ifPacket.select[0] ? 0 : -1,
- ifPacket.select[1] ? 0 : -1,
- ifPacket.select[2] ? 0 : -1,
- ifPacket.select[3] ? 0 : -1);
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pblend(
+ const Selector<unpacket_traits<cl::sycl::cl_float4>::size>& ifPacket,
+ const cl::sycl::cl_float4& thenPacket,
+ const cl::sycl::cl_float4& elsePacket) {
+ cl::sycl::cl_int4 condition(
+ ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1,
+ ifPacket.select[2] ? 0 : -1, ifPacket.select[3] ? 0 : -1);
return cl::sycl::select(thenPacket, elsePacket, condition);
}
-template<> inline cl::sycl::cl_double2
-pblend(const Selector<unpacket_traits<cl::sycl::cl_double2>::size>& ifPacket,
- const cl::sycl::cl_double2& thenPacket, const cl::sycl::cl_double2& elsePacket) {
+template <>
+inline cl::sycl::cl_double2 pblend(
+ const Selector<unpacket_traits<cl::sycl::cl_double2>::size>& ifPacket,
+ const cl::sycl::cl_double2& thenPacket,
+ const cl::sycl::cl_double2& elsePacket) {
cl::sycl::cl_long2 condition(ifPacket.select[0] ? 0 : -1,
ifPacket.select[1] ? 0 : -1);
return cl::sycl::select(thenPacket, elsePacket, condition);
}
+#endif // SYCL_DEVICE_ONLY
+
+#define SYCL_PSTORE(alignment) \
+ template <typename packet_type> \
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \
+ const Eigen::TensorSycl::internal::RangeAccess< \
+ cl::sycl::access::mode::read_write, \
+ typename unpacket_traits<packet_type>::type>& to, \
+ const packet_type& from) { \
+ pstore##alignment(to.get_pointer(), from); \
+ }
+
+// global space
+SYCL_PSTORE()
+SYCL_PSTORE(u)
+
+#undef SYCL_PSTORE
+
+template <typename scalar, typename packet_type, int Alignment>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(
+ Eigen::TensorSycl::internal::RangeAccess<
+ cl::sycl::access::mode::read_write,
+ typename unpacket_traits<packet_type>::type>
+ to,
+ const packet_type& from) {
+ pstoret<scalar, packet_type, Alignment>(to.get_pointer(), from);
+}
-} // end namespace internal
+} // end namespace internal
-} // end namespace Eigen
+} // end namespace Eigen
-#endif // EIGEN_USE_SYCL
-#endif // EIGEN_PACKET_MATH_SYCL_H
+#endif // EIGEN_PACKET_MATH_SYCL_H
diff --git a/Eigen/src/Core/arch/SYCL/TypeCasting.h b/Eigen/src/Core/arch/SYCL/TypeCasting.h
index dedd5c84a..9208ab21d 100644
--- a/Eigen/src/Core/arch/SYCL/TypeCasting.h
+++ b/Eigen/src/Core/arch/SYCL/TypeCasting.h
@@ -16,7 +16,7 @@
* \brief:
* TypeCasting
*
-*****************************************************************/
+ *****************************************************************/
#ifndef EIGEN_TYPE_CASTING_SYCL_H
#define EIGEN_TYPE_CASTING_SYCL_H
@@ -24,66 +24,62 @@
namespace Eigen {
namespace internal {
-#ifdef __SYCL_DEVICE_ONLY__
+#ifdef SYCL_DEVICE_ONLY
template <>
struct type_casting_traits<float, int> {
- enum {
- VectorizedCast = 1,
- SrcCoeffRatio = 1,
- TgtCoeffRatio = 1
- };
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
};
-template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_int4 pcast<cl::sycl::cl_float4, cl::sycl::cl_int4>(const cl::sycl::cl_float4& a) {
- return a. template convert<cl::sycl::cl_int, cl::sycl::rounding_mode::automatic>();
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_int4
+pcast<cl::sycl::cl_float4, cl::sycl::cl_int4>(const cl::sycl::cl_float4& a) {
+ return a
+ .template convert<cl::sycl::cl_int, cl::sycl::rounding_mode::automatic>();
}
-
template <>
struct type_casting_traits<int, float> {
- enum {
- VectorizedCast = 1,
- SrcCoeffRatio = 1,
- TgtCoeffRatio = 1
- };
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 1 };
};
-template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pcast<cl::sycl::cl_int4, cl::sycl::cl_float4>(const cl::sycl::cl_int4& a) {
- return a. template convert<cl::sycl::cl_float, cl::sycl::rounding_mode::automatic>();
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4
+pcast<cl::sycl::cl_int4, cl::sycl::cl_float4>(const cl::sycl::cl_int4& a) {
+ return a.template convert<cl::sycl::cl_float,
+ cl::sycl::rounding_mode::automatic>();
}
template <>
struct type_casting_traits<double, float> {
- enum {
- VectorizedCast = 1,
- SrcCoeffRatio = 2,
- TgtCoeffRatio = 1
- };
+ enum { VectorizedCast = 1, SrcCoeffRatio = 2, TgtCoeffRatio = 1 };
};
-template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pcast<cl::sycl::cl_double2, cl::sycl::cl_float4>(const cl::sycl::cl_double2& a, const cl::sycl::cl_double2& b) {
- auto a1=a. template convert<cl::sycl::cl_float, cl::sycl::rounding_mode::automatic>();
- auto b1=b. template convert<cl::sycl::cl_float, cl::sycl::rounding_mode::automatic>();
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4
+pcast<cl::sycl::cl_double2, cl::sycl::cl_float4>(
+ const cl::sycl::cl_double2& a, const cl::sycl::cl_double2& b) {
+ auto a1 = a.template convert<cl::sycl::cl_float,
+ cl::sycl::rounding_mode::automatic>();
+ auto b1 = b.template convert<cl::sycl::cl_float,
+ cl::sycl::rounding_mode::automatic>();
return cl::sycl::float4(a1.x(), a1.y(), b1.x(), b1.y());
}
template <>
struct type_casting_traits<float, double> {
- enum {
- VectorizedCast = 1,
- SrcCoeffRatio = 1,
- TgtCoeffRatio = 2
- };
+ enum { VectorizedCast = 1, SrcCoeffRatio = 1, TgtCoeffRatio = 2 };
};
-template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2 pcast<cl::sycl::cl_float4, cl::sycl::cl_double2>(const cl::sycl::cl_float4& a) {
+template <>
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2
+pcast<cl::sycl::cl_float4, cl::sycl::cl_double2>(const cl::sycl::cl_float4& a) {
// Simply discard the second half of the input
return cl::sycl::cl_double2(a.x(), a.y());
}
#endif
-} // end namespace internal
+} // end namespace internal
-} // end namespace Eigen
+} // end namespace Eigen
-#endif // EIGEN_TYPE_CASTING_SYCL_H
+#endif // EIGEN_TYPE_CASTING_SYCL_H