aboutsummaryrefslogtreecommitdiffhomepage
path: root/Eigen/src/Core/arch/SYCL
diff options
context:
space:
mode:
authorGravatar Mehdi Goli <mehdi.goli@codeplay.com>2018-08-01 12:40:54 +0100
committerGravatar Mehdi Goli <mehdi.goli@codeplay.com>2018-08-01 12:40:54 +0100
commitc84509d7cc5fa3e032da8cfdcd5e82b2897cc5d9 (patch)
tree80e9c767dd62de069d95ebb4b5578e0426635cc4 /Eigen/src/Core/arch/SYCL
parentedf46bd7a27ef1088efc2116196c088d59d22b4a (diff)
Adding new arch/SYCL headers, used for SYCL vectorization.
Diffstat (limited to 'Eigen/src/Core/arch/SYCL')
-rw-r--r--Eigen/src/Core/arch/SYCL/InteropHeaders.h104
-rw-r--r--Eigen/src/Core/arch/SYCL/MathFunctions.h221
-rw-r--r--Eigen/src/Core/arch/SYCL/PacketMath.h458
-rw-r--r--Eigen/src/Core/arch/SYCL/TypeCasting.h89
4 files changed, 872 insertions, 0 deletions
diff --git a/Eigen/src/Core/arch/SYCL/InteropHeaders.h b/Eigen/src/Core/arch/SYCL/InteropHeaders.h
new file mode 100644
index 000000000..c1da40d14
--- /dev/null
+++ b/Eigen/src/Core/arch/SYCL/InteropHeaders.h
@@ -0,0 +1,104 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * InteropHeaders.h
+ *
+ * \brief:
+ * InteropHeaders
+ *
+*****************************************************************/
+
+#ifndef EIGEN_INTEROP_HEADERS_SYCL_H
+#define EIGEN_INTEROP_HEADERS_SYCL_H
+#if defined EIGEN_USE_SYCL
+namespace Eigen {
+
+namespace internal {
+#define SYCL_PACKET_TRAITS(packet_type, val, unpacket_type, lengths)\
+ template<> struct packet_traits<unpacket_type> : default_packet_traits\
+ {\
+ typedef packet_type type;\
+ typedef packet_type half;\
+ enum {\
+ Vectorizable = 1,\
+ AlignedOnScalar = 1,\
+ size=lengths,\
+ HasHalfPacket = 0,\
+ HasDiv = 1,\
+ HasLog = 1,\
+ HasExp = 1,\
+ HasSqrt = 1,\
+ HasRsqrt = 1,\
+ HasSin = 1,\
+ HasCos = 1,\
+ HasTan = 1,\
+ HasASin = 1,\
+ HasACos = 1,\
+ HasATan = 1,\
+ HasSinh = 1,\
+ HasCosh = 1,\
+ HasTanh = 1,\
+ HasLGamma = 0,\
+ HasDiGamma = 0,\
+ HasZeta = 0,\
+ HasPolygamma = 0,\
+ HasErf = 0,\
+ HasErfc = 0,\
+ HasIGamma = 0,\
+ HasIGammac = 0,\
+ HasBetaInc = 0,\
+ HasBlend = val,\
+ HasMax=1,\
+ HasMin=1,\
+ HasMul=1,\
+ HasAdd=1,\
+ HasFloor=1,\
+ HasRound=1,\
+ HasLog1p=1,\
+ HasExpm1=1,\
+ HasCeil=1,\
+ };\
+ };
+
+SYCL_PACKET_TRAITS(cl::sycl::cl_float4, 1, float, 4)
+SYCL_PACKET_TRAITS(cl::sycl::cl_float4, 1, const float, 4)
+SYCL_PACKET_TRAITS(cl::sycl::cl_double2, 0, double, 2)
+SYCL_PACKET_TRAITS(cl::sycl::cl_double2, 0, const double, 2)
+#undef SYCL_PACKET_TRAITS
+
+
+// Make sure this is only available when targeting a GPU: we don't want to
+// introduce conflicts between these packet_traits definitions and the ones
+// we'll use on the host side (SSE, AVX, ...)
+#define SYCL_ARITHMETIC(packet_type) template<> struct is_arithmetic<packet_type> { enum { value = true }; };
+SYCL_ARITHMETIC(cl::sycl::cl_float4)
+SYCL_ARITHMETIC(cl::sycl::cl_double2)
+#undef SYCL_ARITHMETIC
+
+#define SYCL_UNPACKET_TRAITS(packet_type, unpacket_type, lengths)\
+template<> struct unpacket_traits<packet_type> {\
+ typedef unpacket_type type;\
+ enum {size=lengths, alignment=Aligned16};\
+ typedef packet_type half;\
+};
+SYCL_UNPACKET_TRAITS(cl::sycl::cl_float4, float, 4)
+SYCL_UNPACKET_TRAITS(cl::sycl::cl_double2, double, 2)
+
+#undef SYCL_UNPACKET_TRAITS
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_USE_SYCL
+#endif // EIGEN_INTEROP_HEADERS_SYCL_H
diff --git a/Eigen/src/Core/arch/SYCL/MathFunctions.h b/Eigen/src/Core/arch/SYCL/MathFunctions.h
new file mode 100644
index 000000000..422839c6c
--- /dev/null
+++ b/Eigen/src/Core/arch/SYCL/MathFunctions.h
@@ -0,0 +1,221 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * MathFunctions.h
+ *
+ * \brief:
+ * MathFunctions
+ *
+*****************************************************************/
+
+#ifndef EIGEN_MATH_FUNCTIONS_SYCL_H
+#define EIGEN_MATH_FUNCTIONS_SYCL_H
+
+namespace Eigen {
+
+namespace internal {
+
+// Make sure this is only available when targeting a GPU: we don't want to
+// introduce conflicts between these packet_traits definitions and the ones
+// we'll use on the host side (SSE, AVX, ...)
+//#if defined(__SYCL_DEVICE_ONLY__) && defined(EIGEN_USE_SYCL)
+#define SYCL_PLOG(packet_type) \
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+packet_type plog<packet_type>(const packet_type& a) { return cl::sycl::log(a); }
+
+SYCL_PLOG(cl::sycl::cl_float4)
+SYCL_PLOG(cl::sycl::cl_double2)
+#undef SYCL_PLOG
+
+#define SYCL_PLOG1P(packet_type) \
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+packet_type plog1p<packet_type>(const packet_type& a) { return cl::sycl::log1p(a); }
+
+SYCL_PLOG1P(cl::sycl::cl_float4)
+SYCL_PLOG1P(cl::sycl::cl_double2)
+#undef SYCL_PLOG1P
+
+#define SYCL_PLOG10(packet_type) \
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+packet_type plog10<packet_type>(const packet_type& a) { return cl::sycl::log10(a); }
+
+SYCL_PLOG10(cl::sycl::cl_float4)
+SYCL_PLOG10(cl::sycl::cl_double2)
+#undef SYCL_PLOG10
+
+#define SYCL_PEXP(packet_type) \
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+packet_type pexp<packet_type>(const packet_type& a) { return cl::sycl::exp(a); }
+
+SYCL_PEXP(cl::sycl::cl_float4)
+SYCL_PEXP(cl::sycl::cl_double2)
+#undef SYCL_PEXP
+
+#define SYCL_PEXPM1(packet_type) \
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+packet_type pexpm1<packet_type>(const packet_type& a) { return cl::sycl::expm1(a); }
+
+SYCL_PEXPM1(cl::sycl::cl_float4)
+SYCL_PEXPM1(cl::sycl::cl_double2)
+#undef SYCL_PEXPM1
+
+#define SYCL_PSQRT(packet_type) \
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+packet_type psqrt<packet_type>(const packet_type& a) { return cl::sycl::sqrt(a); }
+
+SYCL_PSQRT(cl::sycl::cl_float4)
+SYCL_PSQRT(cl::sycl::cl_double2)
+#undef SYCL_PSQRT
+
+
+#define SYCL_PRSQRT(packet_type) \
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+packet_type prsqrt<packet_type>(const packet_type& a) { return cl::sycl::rsqrt(a); }
+
+SYCL_PRSQRT(cl::sycl::cl_float4)
+SYCL_PRSQRT(cl::sycl::cl_double2)
+#undef SYCL_PRSQRT
+
+
+/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
+#define SYCL_PSIN(packet_type) \
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+packet_type psin<packet_type>(const packet_type& a) { return cl::sycl::sin(a); }
+
+SYCL_PSIN(cl::sycl::cl_float4)
+SYCL_PSIN(cl::sycl::cl_double2)
+#undef SYCL_PSIN
+
+
+/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
+#define SYCL_PCOS(packet_type) \
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+packet_type pcos<packet_type>(const packet_type& a) { return cl::sycl::cos(a); }
+
+SYCL_PCOS(cl::sycl::cl_float4)
+SYCL_PCOS(cl::sycl::cl_double2)
+#undef SYCL_PCOS
+
+/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
+#define SYCL_PTAN(packet_type) \
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+packet_type ptan<packet_type>(const packet_type& a) { return cl::sycl::tan(a); }
+
+SYCL_PTAN(cl::sycl::cl_float4)
+SYCL_PTAN(cl::sycl::cl_double2)
+#undef SYCL_PTAN
+
+/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
+#define SYCL_PASIN(packet_type) \
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+packet_type pasin<packet_type>(const packet_type& a) { return cl::sycl::asin(a); }
+
+SYCL_PASIN(cl::sycl::cl_float4)
+SYCL_PASIN(cl::sycl::cl_double2)
+#undef SYCL_PASIN
+
+
+/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
+#define SYCL_PACOS(packet_type) \
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+packet_type pacos<packet_type>(const packet_type& a) { return cl::sycl::acos(a); }
+
+SYCL_PACOS(cl::sycl::cl_float4)
+SYCL_PACOS(cl::sycl::cl_double2)
+#undef SYCL_PACOS
+
+/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
+#define SYCL_PATAN(packet_type) \
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+packet_type patan<packet_type>(const packet_type& a) { return cl::sycl::atan(a); }
+
+SYCL_PATAN(cl::sycl::cl_float4)
+SYCL_PATAN(cl::sycl::cl_double2)
+#undef SYCL_PATAN
+
+/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */
+#define SYCL_PSINH(packet_type) \
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+packet_type psinh<packet_type>(const packet_type& a) { return cl::sycl::sinh(a); }
+
+SYCL_PSINH(cl::sycl::cl_float4)
+SYCL_PSINH(cl::sycl::cl_double2)
+#undef SYCL_PSINH
+
+/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */
+#define SYCL_PCOSH(packet_type) \
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+packet_type pcosh<packet_type>(const packet_type& a) { return cl::sycl::cosh(a); }
+
+SYCL_PCOSH(cl::sycl::cl_float4)
+SYCL_PCOSH(cl::sycl::cl_double2)
+#undef SYCL_PCOSH
+
+/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */
+#define SYCL_PTANH(packet_type) \
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+packet_type ptanh<packet_type>(const packet_type& a) { return cl::sycl::tanh(a); }
+
+SYCL_PTANH(cl::sycl::cl_float4)
+SYCL_PTANH(cl::sycl::cl_double2)
+#undef SYCL_PTANH
+
+#define SYCL_PCEIL(packet_type) \
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+packet_type pceil<packet_type>(const packet_type& a) { return cl::sycl::ceil(a); }
+
+SYCL_PCEIL(cl::sycl::cl_float4)
+SYCL_PCEIL(cl::sycl::cl_double2)
+#undef SYCL_PCEIL
+
+
+#define SYCL_PROUND(packet_type) \
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+packet_type pround<packet_type>(const packet_type& a) { return cl::sycl::round(a); }
+
+SYCL_PROUND(cl::sycl::cl_float4)
+SYCL_PROUND(cl::sycl::cl_double2)
+#undef SYCL_PROUND
+
+#define SYCL_FLOOR(packet_type) \
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+packet_type pfloor<packet_type>(const packet_type& a) { return cl::sycl::floor(a); }
+
+SYCL_FLOOR(cl::sycl::cl_float4)
+SYCL_FLOOR(cl::sycl::cl_double2)
+#undef SYCL_FLOOR
+
+
+#define SYCL_PMIN(packet_type, expr) \
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+packet_type pmin<packet_type>(const packet_type& a, const packet_type& b) { return expr; }
+
+SYCL_PMIN(cl::sycl::cl_float4, cl::sycl::fmin(a, b))
+SYCL_PMIN(cl::sycl::cl_double2, cl::sycl::fmin(a, b))
+#undef SYCL_PMIN
+
+#define SYCL_PMAX(packet_type, expr) \
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE \
+packet_type pmax<packet_type>(const packet_type& a, const packet_type& b) { return expr; }
+
+SYCL_PMAX(cl::sycl::cl_float4, cl::sycl::fmax(a, b))
+SYCL_PMAX(cl::sycl::cl_double2, cl::sycl::fmax(a, b))
+#undef SYCL_PMAX
+
+//#endif
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_MATH_FUNCTIONS_CUDA_H
diff --git a/Eigen/src/Core/arch/SYCL/PacketMath.h b/Eigen/src/Core/arch/SYCL/PacketMath.h
new file mode 100644
index 000000000..820a83311
--- /dev/null
+++ b/Eigen/src/Core/arch/SYCL/PacketMath.h
@@ -0,0 +1,458 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * PacketMath.h
+ *
+ * \brief:
+ * PacketMath
+ *
+*****************************************************************/
+
+#ifndef EIGEN_PACKET_MATH_SYCL_H
+#define EIGEN_PACKET_MATH_SYCL_H
+#include <type_traits>
+#if defined EIGEN_USE_SYCL
+namespace Eigen {
+
+namespace internal {
+
+#define SYCL_PLOADT_RO(address_space_target)\
+template<typename packet_type, int Alignment>\
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type\
+ ploadt_ro(typename cl::sycl::multi_ptr<const typename unpacket_traits<packet_type>::type,\
+ cl::sycl::access::address_space::address_space_target>::pointer_t from) {\
+ typedef typename unpacket_traits<packet_type>::type scalar;\
+ typedef cl::sycl::multi_ptr<scalar, cl::sycl::access::address_space::address_space_target> multi_ptr;\
+ auto res=packet_type(static_cast<typename unpacket_traits<packet_type>::type>(0));\
+ res.load(0, multi_ptr(const_cast<typename multi_ptr::pointer_t>(from)));\
+ return res;\
+}
+
+SYCL_PLOADT_RO(global_space)
+SYCL_PLOADT_RO(local_space)
+
+#undef SYCL_PLOADT_RO
+
+
+#define SYCL_PLOAD(address_space_target, Alignment, AlignedType)\
+template<typename packet_type> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type\
+ pload##AlignedType(typename cl::sycl::multi_ptr<const typename unpacket_traits<packet_type>::type,\
+ cl::sycl::access::address_space::address_space_target>::pointer_t from) {\
+ return ploadt_ro<packet_type, Alignment>(from);\
+ }
+
+// global space
+SYCL_PLOAD(global_space, Unaligned, u)
+SYCL_PLOAD(global_space, Aligned, )
+
+// local space
+SYCL_PLOAD(local_space, Unaligned, u)
+SYCL_PLOAD(local_space, Aligned, )
+
+// private space
+//SYCL_PLOAD(private_space, Unaligned, u)
+//SYCL_PLOAD(private_space, Aligned, )
+
+#undef SYCL_PLOAD
+
+
+/** \internal \returns a packet version of \a *from.
+ * The pointer \a from must be aligned on a \a Alignment bytes boundary. */
+#define SYCL_PLOADT(address_space_target)\
+template<typename packet_type, int Alignment>\
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt(\
+ typename cl::sycl::multi_ptr<const typename unpacket_traits<packet_type>::type,\
+ cl::sycl::access::address_space::address_space_target>::pointer_t from)\
+{\
+ if(Alignment >= unpacket_traits<packet_type>::alignment)\
+ return pload<packet_type>(from);\
+ else\
+ return ploadu<packet_type>(from);\
+}
+
+// global space
+SYCL_PLOADT(global_space)
+// local space
+SYCL_PLOADT(local_space)
+
+//private_space
+// There is no need to specialise it for private space as it can use the GenericPacketMath version
+
+#define SYCL_PLOADT_RO_SPECIAL(packet_type, Alignment)\
+ template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type\
+ ploadt_ro<packet_type, Alignment>(const typename unpacket_traits<packet_type>::type * from) { \
+ typedef typename unpacket_traits<packet_type>::type scalar;\
+ auto res=packet_type(static_cast<scalar>(0));\
+ res. template load<cl::sycl::access::address_space::private_space>(0, const_cast<scalar*>(from));\
+ return res;\
+ }
+
+SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Aligned)
+SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Aligned)
+SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Unaligned)
+SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Unaligned)
+
+
+#define SYCL_PLOAD_SPECIAL(packet_type, alignment_type)\
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type\
+ pload##alignment_type(const typename unpacket_traits<packet_type>::type * from) { \
+ typedef typename unpacket_traits<packet_type>::type scalar;\
+ auto res=packet_type(static_cast<scalar>(0));\
+ res. template load<cl::sycl::access::address_space::private_space>(0, const_cast<scalar*>(from));\
+ return res;\
+ }
+SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4,)
+SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2,)
+SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, u)
+SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, u)
+
+#undef SYCL_PLOAD_SPECIAL
+
+#define SYCL_PSTORE(scalar, packet_type, address_space_target, alignment)\
+template<>\
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \
+ typename cl::sycl::multi_ptr<scalar, cl::sycl::access::address_space::address_space_target>::pointer_t to, \
+ const packet_type& from) {\
+ typedef cl::sycl::multi_ptr<scalar, cl::sycl::access::address_space::address_space_target> multi_ptr;\
+ from.store(0, multi_ptr(to));\
+}
+
+// global space
+SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, )
+SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, u)
+SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, )
+SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, u)
+
+SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, )
+SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, u)
+SYCL_PSTORE(double, cl::sycl::cl_double2, local_space, )
+SYCL_PSTORE(double, cl::sycl::cl_double2, local_space, u)
+
+SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, )
+SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, u)
+SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, )
+SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, u)
+
+
+#define SYCL_PSTORE_T(scalar, packet_type, Alignment)\
+template<>\
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret<scalar, packet_type, Alignment>(\
+ scalar* to,\
+ const packet_type& from) {\
+ if(Alignment)\
+ pstore(to, from);\
+ else\
+ pstoreu(to,from);\
+}
+
+
+SYCL_PSTORE_T(float, cl::sycl::cl_float4, Aligned)
+
+SYCL_PSTORE_T(float, cl::sycl::cl_float4, Unaligned)
+
+SYCL_PSTORE_T(double, cl::sycl::cl_double2, Aligned)
+
+SYCL_PSTORE_T(double, cl::sycl::cl_double2, Unaligned)
+
+
+#undef SYCL_PSTORE_T
+
+#define SYCL_PSET1(packet_type)\
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pset1<packet_type>(\
+ const typename unpacket_traits<packet_type>::type& from) {\
+ return packet_type(from);\
+}
+
+// global space
+SYCL_PSET1(cl::sycl::cl_float4)
+SYCL_PSET1(cl::sycl::cl_double2)
+
+#undef SYCL_PSET1
+
+
+template <typename packet_type> struct get_base_packet {
+template <typename sycl_multi_pointer>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type get_ploaddup(sycl_multi_pointer ) {}
+
+ template <typename sycl_multi_pointer>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type get_pgather(sycl_multi_pointer , Index ) {}
+};
+
+template <> struct get_base_packet <cl::sycl::cl_float4> {
+ template <typename sycl_multi_pointer>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_ploaddup(sycl_multi_pointer from) {
+ return cl::sycl::cl_float4(from[0], from[0], from[1], from[1]);
+ }
+ template <typename sycl_multi_pointer>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_pgather(sycl_multi_pointer from, Index stride) {
+ return cl::sycl::cl_float4(from[0*stride], from[1*stride], from[2*stride], from[3*stride]);
+ }
+
+ template <typename sycl_multi_pointer>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(sycl_multi_pointer to , const cl::sycl::cl_float4& from, Index stride) {
+ auto tmp = stride;
+ to[0] = from.x();
+ to[tmp] = from.y();
+ to[tmp += stride] = from.z();
+ to[tmp += stride] = from.w();
+ }
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 set_plset(const float& a) {
+ return cl::sycl::cl_float4(static_cast<float>(a), static_cast<float>(a+1), static_cast<float>(a+2), static_cast<float>(a+3));
+ }
+};
+
+template <> struct get_base_packet <cl::sycl::cl_double2> {
+ template <typename sycl_multi_pointer>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_ploaddup(const sycl_multi_pointer from) {
+ return cl::sycl::cl_double2(from[0], from[0]);
+ }
+
+ template <typename sycl_multi_pointer, typename Index>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_pgather(const sycl_multi_pointer from, Index stride) {
+ return cl::sycl::cl_double2(from[0*stride], from[1*stride]);
+ }
+
+ template <typename sycl_multi_pointer>
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(sycl_multi_pointer to , const cl::sycl::cl_double2& from, Index stride) {
+ to[0] = from.x();
+ to[stride] = from.y();
+ }
+
+ static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 set_plset(const double& a) {
+ return cl::sycl::cl_double2(static_cast<double>(a), static_cast<double>(a + 1));
+ }
+};
+
+#define SYCL_PLOAD_DUP(address_space_target)\
+template<typename packet_type> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \
+ploaddup(typename cl::sycl::multi_ptr<const typename unpacket_traits<packet_type>::type,\
+ cl::sycl::access::address_space::address_space_target>::pointer_t from)\
+{\
+ return get_base_packet<packet_type>::get_ploaddup(from); \
+}
+
+// global space
+SYCL_PLOAD_DUP(global_space)
+// local_space
+SYCL_PLOAD_DUP(local_space)
+// private_space
+//SYCL_PLOAD_DUP(private_space)
+#undef SYCL_PLOAD_DUP
+
+#define SYCL_PLOAD_DUP_SPECILIZE(packet_type)\
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \
+ploaddup<packet_type>(const typename unpacket_traits<packet_type>::type * from)\
+{ \
+ return get_base_packet<packet_type>::get_ploaddup(from); \
+}
+
+SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_float4)
+SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_double2)
+
+#undef SYCL_PLOAD_DUP_SPECILIZE
+
+#define SYCL_PLSET(packet_type)\
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type plset<packet_type>(const typename unpacket_traits<packet_type>::type& a) {\
+ return get_base_packet<packet_type>::set_plset(a);\
+}
+
+SYCL_PLSET(cl::sycl::cl_float4)
+SYCL_PLSET(cl::sycl::cl_double2)
+
+#undef SYCL_PLSET
+
+
+#define SYCL_PGATHER(address_space_target)\
+template<typename Scalar, typename packet_type> EIGEN_DEVICE_FUNC inline packet_type pgather(\
+ typename cl::sycl::multi_ptr<const typename unpacket_traits<packet_type>::type,\
+ cl::sycl::access::address_space::address_space_target>::pointer_t from, Index stride) {\
+ return get_base_packet<packet_type>::get_pgather(from, stride); \
+}
+
+// global space
+SYCL_PGATHER(global_space)
+// local space
+SYCL_PGATHER(local_space)
+// private space
+//SYCL_PGATHER(private_space)
+
+#undef SYCL_PGATHER
+
+
+#define SYCL_PGATHER_SPECILIZE(scalar, packet_type)\
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \
+pgather<scalar, packet_type>(const typename unpacket_traits<packet_type>::type * from, Index stride)\
+{ \
+ return get_base_packet<packet_type>::get_pgather(from, stride); \
+}
+
+SYCL_PGATHER_SPECILIZE(float, cl::sycl::cl_float4)
+SYCL_PGATHER_SPECILIZE(double, cl::sycl::cl_double2)
+
+#undef SYCL_PGATHER_SPECILIZE
+
+#define SYCL_PSCATTER(address_space_target)\
+template<typename Scalar, typename packet_type> EIGEN_DEVICE_FUNC inline void pscatter(\
+ typename cl::sycl::multi_ptr<typename unpacket_traits<packet_type>::type,\
+ cl::sycl::access::address_space::address_space_target>::pointer_t to,\
+ const packet_type& from, Index stride) {\
+ get_base_packet<packet_type>::set_pscatter(to, from, stride);\
+}
+
+// global space
+SYCL_PSCATTER(global_space)
+// local space
+SYCL_PSCATTER(local_space)
+// private space
+//SYCL_PSCATTER(private_space)
+
+#undef SYCL_PSCATTER
+
+
+
+#define SYCL_PSCATTER_SPECILIZE(scalar, packet_type)\
+template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void \
+pscatter<scalar, packet_type>(typename unpacket_traits<packet_type>::type * to, const packet_type& from, Index stride)\
+{ \
+ get_base_packet<packet_type>::set_pscatter(to, from, stride);\
+}
+
+SYCL_PSCATTER_SPECILIZE(float, cl::sycl::cl_float4)
+SYCL_PSCATTER_SPECILIZE(double, cl::sycl::cl_double2)
+
+#undef SYCL_PSCATTER_SPECILIZE
+
+
+#define SYCL_PMAD(packet_type)\
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pmadd( const packet_type& a,\
+ const packet_type& b, const packet_type& c){\
+ return cl::sycl::mad(a,b,c);\
+}
+
+SYCL_PMAD(cl::sycl::cl_float4)
+SYCL_PMAD(cl::sycl::cl_double2)
+#undef SYCL_PMAD
+
+
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float pfirst<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
+ return a.x();
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double pfirst<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
+ return a.x();
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
+ return a.x() + a.y() + a.z() + a.w();
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
+ return a.x() + a.y();
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_max<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
+ return cl::sycl::fmax(cl::sycl::fmax(a.x(), a.y()), cl::sycl::fmax(a.z(), a.w()));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_max<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
+ return cl::sycl::fmax(a.x(), a.y());
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_min<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
+ return cl::sycl::fmin(cl::sycl::fmin(a.x(), a.y()), cl::sycl::fmin(a.z(), a.w()));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_min<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
+ return cl::sycl::fmin(a.x(), a.y());
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_mul<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
+ return a.x() * a.y() * a.z() * a.w();
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_mul<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
+ return a.x() * a.y();
+}
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pabs<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
+ return cl::sycl::cl_float4(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()), cl::sycl::fabs(a.z()), cl::sycl::fabs(a.w()));
+}
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2 pabs<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
+ return cl::sycl::cl_double2(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()));
+}
+
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void
+ptranspose(PacketBlock<cl::sycl::cl_float4,4>& kernel) {
+ float tmp = kernel.packet[0].y();
+ kernel.packet[0].y() = kernel.packet[1].x();
+ kernel.packet[1].x() = tmp;
+// std::swap(kernel.packet[0].y(), kernel.packet[1].x());
+
+ tmp = kernel.packet[0].z();
+ kernel.packet[0].z() = kernel.packet[2].x();
+ kernel.packet[2].x() = tmp;
+ //std::swap(kernel.packet[0].z(), kernel.packet[2].x());
+
+ tmp = kernel.packet[0].w();
+ kernel.packet[0].w() = kernel.packet[3].x();
+ kernel.packet[3].x() = tmp;
+
+ //std::swap(kernel.packet[0].w(), kernel.packet[3].x());
+
+ tmp = kernel.packet[1].z();
+ kernel.packet[1].z() = kernel.packet[2].y();
+ kernel.packet[2].y() = tmp;
+// std::swap(kernel.packet[1].z(), kernel.packet[2].y());
+
+ tmp = kernel.packet[1].w();
+ kernel.packet[1].w() = kernel.packet[3].y();
+ kernel.packet[3].y() = tmp;
+// std::swap(kernel.packet[1].w(), kernel.packet[3].y());
+
+ tmp = kernel.packet[2].w();
+ kernel.packet[2].w() = kernel.packet[3].z();
+ kernel.packet[3].z() = tmp;
+// std::swap(kernel.packet[2].w(), kernel.packet[3].z());
+
+}
+
+ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void
+ptranspose(PacketBlock<cl::sycl::cl_double2,2>& kernel) {
+ double tmp = kernel.packet[0].y();
+ kernel.packet[0].y() = kernel.packet[1].x();
+ kernel.packet[1].x() = tmp;
+//std::swap(kernel.packet[0].y(), kernel.packet[1].x());
+}
+
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4
+pblend(const Selector<unpacket_traits<cl::sycl::cl_float4>::size>& ifPacket,
+ const cl::sycl::cl_float4& thenPacket, const cl::sycl::cl_float4& elsePacket) {
+ cl::sycl::cl_int4 condition(ifPacket.select[0] ? 0 : -1,
+ ifPacket.select[1] ? 0 : -1,
+ ifPacket.select[2] ? 0 : -1,
+ ifPacket.select[3] ? 0 : -1);
+ return cl::sycl::select(thenPacket, elsePacket, condition);
+}
+
+template<> inline cl::sycl::cl_double2
+pblend(const Selector<unpacket_traits<cl::sycl::cl_double2>::size>& ifPacket,
+ const cl::sycl::cl_double2& thenPacket, const cl::sycl::cl_double2& elsePacket) {
+ cl::sycl::cl_long2 condition(ifPacket.select[0] ? 0 : -1,
+ ifPacket.select[1] ? 0 : -1);
+ return cl::sycl::select(thenPacket, elsePacket, condition);
+}
+
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_USE_SYCL
+#endif // EIGEN_PACKET_MATH_SYCL_H
diff --git a/Eigen/src/Core/arch/SYCL/TypeCasting.h b/Eigen/src/Core/arch/SYCL/TypeCasting.h
new file mode 100644
index 000000000..dedd5c84a
--- /dev/null
+++ b/Eigen/src/Core/arch/SYCL/TypeCasting.h
@@ -0,0 +1,89 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Mehdi Goli Codeplay Software Ltd.
+// Ralph Potter Codeplay Software Ltd.
+// Luke Iwanski Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/*****************************************************************
+ * TypeCasting.h
+ *
+ * \brief:
+ * TypeCasting
+ *
+*****************************************************************/
+
+#ifndef EIGEN_TYPE_CASTING_SYCL_H
+#define EIGEN_TYPE_CASTING_SYCL_H
+
+namespace Eigen {
+
+namespace internal {
+#ifdef __SYCL_DEVICE_ONLY__
+template <>
+struct type_casting_traits<float, int> {
+ enum {
+ VectorizedCast = 1,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 1
+ };
+};
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_int4 pcast<cl::sycl::cl_float4, cl::sycl::cl_int4>(const cl::sycl::cl_float4& a) {
+ return a. template convert<cl::sycl::cl_int, cl::sycl::rounding_mode::automatic>();
+}
+
+
+template <>
+struct type_casting_traits<int, float> {
+ enum {
+ VectorizedCast = 1,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 1
+ };
+};
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pcast<cl::sycl::cl_int4, cl::sycl::cl_float4>(const cl::sycl::cl_int4& a) {
+ return a. template convert<cl::sycl::cl_float, cl::sycl::rounding_mode::automatic>();
+}
+
+template <>
+struct type_casting_traits<double, float> {
+ enum {
+ VectorizedCast = 1,
+ SrcCoeffRatio = 2,
+ TgtCoeffRatio = 1
+ };
+};
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pcast<cl::sycl::cl_double2, cl::sycl::cl_float4>(const cl::sycl::cl_double2& a, const cl::sycl::cl_double2& b) {
+ auto a1=a. template convert<cl::sycl::cl_float, cl::sycl::rounding_mode::automatic>();
+ auto b1=b. template convert<cl::sycl::cl_float, cl::sycl::rounding_mode::automatic>();
+ return cl::sycl::float4(a1.x(), a1.y(), b1.x(), b1.y());
+}
+
+template <>
+struct type_casting_traits<float, double> {
+ enum {
+ VectorizedCast = 1,
+ SrcCoeffRatio = 1,
+ TgtCoeffRatio = 2
+ };
+};
+
+template<> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2 pcast<cl::sycl::cl_float4, cl::sycl::cl_double2>(const cl::sycl::cl_float4& a) {
+ // Simply discard the second half of the input
+ return cl::sycl::cl_double2(a.x(), a.y());
+}
+
+#endif
+} // end namespace internal
+
+} // end namespace Eigen
+
+#endif // EIGEN_TYPE_CASTING_SYCL_H