// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Mehdi Goli Codeplay Software Ltd. // Ralph Potter Codeplay Software Ltd. // Luke Iwanski Codeplay Software Ltd. // Contact: // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. /***************************************************************** * PacketMath.h * * \brief: * PacketMath * *****************************************************************/ #ifndef EIGEN_PACKET_MATH_SYCL_H #define EIGEN_PACKET_MATH_SYCL_H #include namespace Eigen { namespace internal { #ifdef SYCL_DEVICE_ONLY #define SYCL_PLOADT_RO(address_space_target) \ template \ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt_ro( \ typename cl::sycl::multi_ptr< \ const typename unpacket_traits::type, \ cl::sycl::access::address_space::address_space_target>::pointer_t \ from) { \ typedef typename unpacket_traits::type scalar; \ typedef cl::sycl::multi_ptr< \ scalar, cl::sycl::access::address_space::address_space_target> \ multi_ptr; \ auto res = packet_type( \ static_cast::type>(0)); \ res.load(0, multi_ptr(const_cast(from))); \ return res; \ } SYCL_PLOADT_RO(global_space) SYCL_PLOADT_RO(local_space) #undef SYCL_PLOADT_RO #endif template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt_ro(const Eigen::TensorSycl::internal::RangeAccess< cl::sycl::access::mode::read_write, T>& from) { return ploadt_ro(from.get_pointer()); } #ifdef SYCL_DEVICE_ONLY #define SYCL_PLOAD(address_space_target, Alignment, AlignedType) \ template \ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType( \ typename cl::sycl::multi_ptr< \ const typename unpacket_traits::type, \ cl::sycl::access::address_space::address_space_target>::pointer_t \ from) { \ return ploadt_ro(from); \ } // global space SYCL_PLOAD(global_space, Unaligned, u) SYCL_PLOAD(global_space, Aligned, ) // local space SYCL_PLOAD(local_space, Unaligned, u) SYCL_PLOAD(local_space, Aligned, ) #undef SYCL_PLOAD #endif #define SYCL_PLOAD(Alignment, AlignedType) \ template \ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##AlignedType( \ const Eigen::TensorSycl::internal::RangeAccess< \ cl::sycl::access::mode::read_write, \ typename unpacket_traits::type> \ from) { \ return ploadt_ro(from); \ } SYCL_PLOAD(Unaligned, u) SYCL_PLOAD(Aligned, ) #undef SYCL_PLOAD #ifdef SYCL_DEVICE_ONLY /** \internal \returns a packet version of \a *from. * The pointer \a from must be aligned on a \a Alignment bytes boundary. */ #define SYCL_PLOADT(address_space_target) \ template \ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt( \ typename cl::sycl::multi_ptr< \ const typename unpacket_traits::type, \ cl::sycl::access::address_space::address_space_target>::pointer_t \ from) { \ if (Alignment >= unpacket_traits::alignment) \ return pload(from); \ else \ return ploadu(from); \ } // global space SYCL_PLOADT(global_space) // local space SYCL_PLOADT(local_space) #undef SYCL_PLOADT #endif template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type ploadt(const Eigen::TensorSycl::internal::RangeAccess< cl::sycl::access::mode::read_write, typename unpacket_traits::type>& from) { return ploadt(from.get_pointer()); } #ifdef SYCL_DEVICE_ONLY // private_space #define SYCL_PLOADT_RO_SPECIAL(packet_type, Alignment) \ template <> \ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type \ ploadt_ro( \ const typename unpacket_traits::type* from) { \ typedef typename unpacket_traits::type scalar; \ auto res = packet_type(static_cast(0)); \ res.template load( \ 0, const_cast(from)); \ return res; \ } SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Aligned) SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Aligned) SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_float4, Unaligned) SYCL_PLOADT_RO_SPECIAL(cl::sycl::cl_double2, Unaligned) #define SYCL_PLOAD_SPECIAL(packet_type, alignment_type) \ template <> \ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pload##alignment_type( \ const typename unpacket_traits::type* from) { \ typedef typename unpacket_traits::type scalar; \ auto res = packet_type(static_cast(0)); \ res.template load( \ 0, const_cast(from)); \ return res; \ } SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, ) SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, ) SYCL_PLOAD_SPECIAL(cl::sycl::cl_float4, u) SYCL_PLOAD_SPECIAL(cl::sycl::cl_double2, u) #undef SYCL_PLOAD_SPECIAL #define SYCL_PSTORE(scalar, packet_type, address_space_target, alignment) \ template <> \ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \ typename cl::sycl::multi_ptr< \ scalar, \ cl::sycl::access::address_space::address_space_target>::pointer_t \ to, \ const packet_type& from) { \ typedef cl::sycl::multi_ptr< \ scalar, cl::sycl::access::address_space::address_space_target> \ multi_ptr; \ from.store(0, multi_ptr(to)); \ } // global space SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, ) SYCL_PSTORE(float, cl::sycl::cl_float4, global_space, u) SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, ) SYCL_PSTORE(double, cl::sycl::cl_double2, global_space, u) SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, ) SYCL_PSTORE(float, cl::sycl::cl_float4, local_space, u) SYCL_PSTORE(double, cl::sycl::cl_double2, local_space, ) SYCL_PSTORE(double, cl::sycl::cl_double2, local_space, u) SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, ) SYCL_PSTORE(float, cl::sycl::cl_float4, private_space, u) SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, ) SYCL_PSTORE(double, cl::sycl::cl_double2, private_space, u) #undef SYCL_PSTORE #define SYCL_PSTORE_T(address_space_target) \ template \ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret( \ typename cl::sycl::multi_ptr< \ scalar, \ cl::sycl::access::address_space::address_space_target>::pointer_t \ to, \ const packet_type& from) { \ if (Alignment) \ pstore(to, from); \ else \ pstoreu(to, from); \ } SYCL_PSTORE_T(global_space) SYCL_PSTORE_T(local_space) #undef SYCL_PSTORE_T #define SYCL_PSET1(packet_type) \ template <> \ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pset1( \ const typename unpacket_traits::type& from) { \ return packet_type(from); \ } // global space SYCL_PSET1(cl::sycl::cl_float4) SYCL_PSET1(cl::sycl::cl_double2) #undef SYCL_PSET1 template struct get_base_packet { template static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type get_ploaddup(sycl_multi_pointer) {} template static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type get_pgather(sycl_multi_pointer, Index) {} }; template <> struct get_base_packet { template static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_ploaddup( sycl_multi_pointer from) { return cl::sycl::cl_float4(from[0], from[0], from[1], from[1]); } template static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_pgather( sycl_multi_pointer from, Index stride) { return cl::sycl::cl_float4(from[0 * stride], from[1 * stride], from[2 * stride], from[3 * stride]); } template static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter( sycl_multi_pointer to, const cl::sycl::cl_float4& from, Index stride) { auto tmp = stride; to[0] = from.x(); to[tmp] = from.y(); to[tmp += stride] = from.z(); to[tmp += stride] = from.w(); } static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 set_plset( const float& a) { return cl::sycl::cl_float4(static_cast(a), static_cast(a + 1), static_cast(a + 2), static_cast(a + 3)); } }; template <> struct get_base_packet { template static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_ploaddup(const sycl_multi_pointer from) { return cl::sycl::cl_double2(from[0], from[0]); } template static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_pgather( const sycl_multi_pointer from, Index stride) { return cl::sycl::cl_double2(from[0 * stride], from[1 * stride]); } template static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter( sycl_multi_pointer to, const cl::sycl::cl_double2& from, Index stride) { to[0] = from.x(); to[stride] = from.y(); } static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 set_plset( const double& a) { return cl::sycl::cl_double2(static_cast(a), static_cast(a + 1)); } }; #define SYCL_PLOAD_DUP(address_space_target) \ template \ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup( \ typename cl::sycl::multi_ptr< \ const typename unpacket_traits::type, \ cl::sycl::access::address_space::address_space_target>::pointer_t \ from) { \ return get_base_packet::get_ploaddup(from); \ } // global space SYCL_PLOAD_DUP(global_space) // local_space SYCL_PLOAD_DUP(local_space) #undef SYCL_PLOAD_DUP #define SYCL_PLOAD_DUP_SPECILIZE(packet_type) \ template <> \ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup( \ const typename unpacket_traits::type* from) { \ return get_base_packet::get_ploaddup(from); \ } SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_float4) SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_double2) #undef SYCL_PLOAD_DUP_SPECILIZE #define SYCL_PLSET(packet_type) \ template <> \ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type plset( \ const typename unpacket_traits::type& a) { \ return get_base_packet::set_plset(a); \ } SYCL_PLSET(cl::sycl::cl_float4) SYCL_PLSET(cl::sycl::cl_double2) #undef SYCL_PLSET #define SYCL_PGATHER(address_space_target) \ template \ EIGEN_DEVICE_FUNC inline packet_type pgather( \ typename cl::sycl::multi_ptr< \ const typename unpacket_traits::type, \ cl::sycl::access::address_space::address_space_target>::pointer_t \ from, \ Index stride) { \ return get_base_packet::get_pgather(from, stride); \ } // global space SYCL_PGATHER(global_space) // local space SYCL_PGATHER(local_space) #undef SYCL_PGATHER #define SYCL_PGATHER_SPECILIZE(scalar, packet_type) \ template <> \ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \ pgather( \ const typename unpacket_traits::type* from, Index stride) { \ return get_base_packet::get_pgather(from, stride); \ } SYCL_PGATHER_SPECILIZE(float, cl::sycl::cl_float4) SYCL_PGATHER_SPECILIZE(double, cl::sycl::cl_double2) #undef SYCL_PGATHER_SPECILIZE #define SYCL_PSCATTER(address_space_target) \ template \ EIGEN_DEVICE_FUNC inline void pscatter( \ typename cl::sycl::multi_ptr< \ typename unpacket_traits::type, \ cl::sycl::access::address_space::address_space_target>::pointer_t \ to, \ const packet_type& from, Index stride) { \ get_base_packet::set_pscatter(to, from, stride); \ } // global space SYCL_PSCATTER(global_space) // local space SYCL_PSCATTER(local_space) #undef SYCL_PSCATTER #define SYCL_PSCATTER_SPECILIZE(scalar, packet_type) \ template <> \ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter( \ typename unpacket_traits::type * to, \ const packet_type& from, Index stride) { \ get_base_packet::set_pscatter(to, from, stride); \ } SYCL_PSCATTER_SPECILIZE(float, cl::sycl::cl_float4) SYCL_PSCATTER_SPECILIZE(double, cl::sycl::cl_double2) #undef SYCL_PSCATTER_SPECILIZE #define SYCL_PMAD(packet_type) \ template <> \ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pmadd( \ const packet_type& a, const packet_type& b, const packet_type& c) { \ return cl::sycl::mad(a, b, c); \ } SYCL_PMAD(cl::sycl::cl_float4) SYCL_PMAD(cl::sycl::cl_double2) #undef SYCL_PMAD template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float pfirst( const cl::sycl::cl_float4& a) { return a.x(); } template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double pfirst( const cl::sycl::cl_double2& a) { return a.x(); } template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux( const cl::sycl::cl_float4& a) { return a.x() + a.y() + a.z() + a.w(); } template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux( const cl::sycl::cl_double2& a) { return a.x() + a.y(); } template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_max( const cl::sycl::cl_float4& a) { return cl::sycl::fmax(cl::sycl::fmax(a.x(), a.y()), cl::sycl::fmax(a.z(), a.w())); } template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_max( const cl::sycl::cl_double2& a) { return cl::sycl::fmax(a.x(), a.y()); } template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_min( const cl::sycl::cl_float4& a) { return cl::sycl::fmin(cl::sycl::fmin(a.x(), a.y()), cl::sycl::fmin(a.z(), a.w())); } template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_min( const cl::sycl::cl_double2& a) { return cl::sycl::fmin(a.x(), a.y()); } template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_mul( const cl::sycl::cl_float4& a) { return a.x() * a.y() * a.z() * a.w(); } template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_mul( const cl::sycl::cl_double2& a) { return a.x() * a.y(); } template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pabs(const cl::sycl::cl_float4& a) { return cl::sycl::cl_float4(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()), cl::sycl::fabs(a.z()), cl::sycl::fabs(a.w())); } template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2 pabs(const cl::sycl::cl_double2& a) { return cl::sycl::cl_double2(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y())); } template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_le(const Packet &a, const Packet &b) { return ((a <= b) .template convert::type, cl::sycl::rounding_mode::automatic>()); } template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_lt(const Packet &a, const Packet &b) { return ((a < b) .template convert::type, cl::sycl::rounding_mode::automatic>()); } template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_eq(const Packet &a, const Packet &b) { return ((a == b) .template convert::type, cl::sycl::rounding_mode::automatic>()); } #define SYCL_PCMP(OP, TYPE) \ template <> \ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE TYPE pcmp_##OP(const TYPE &a, \ const TYPE &b) { \ return sycl_pcmp_##OP(a, b); \ } SYCL_PCMP(le, cl::sycl::cl_float4) SYCL_PCMP(lt, cl::sycl::cl_float4) SYCL_PCMP(eq, cl::sycl::cl_float4) SYCL_PCMP(le, cl::sycl::cl_double2) SYCL_PCMP(lt, cl::sycl::cl_double2) SYCL_PCMP(eq, cl::sycl::cl_double2) #undef SYCL_PCMP template struct convert_to_integer; template <> struct convert_to_integer { using type = std::int32_t; using packet_type = cl::sycl::cl_int4; }; template <> struct convert_to_integer { using type = std::int64_t; using packet_type = cl::sycl::cl_long2; }; template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename convert_to_integer< typename unpacket_traits::type>::packet_type vector_as_int(const PacketIn &p) { return ( p.template convert::type>::type, cl::sycl::rounding_mode::automatic>()); } template EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packetOut convert_vector(const PacketIn &p) { return (p.template convert::type, cl::sycl::rounding_mode::automatic>()); } #define SYCL_PAND(TYPE) \ template <> \ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pand(const TYPE &a, \ const TYPE &b) { \ return convert_vector(vector_as_int(a) & vector_as_int(b)); \ } SYCL_PAND(cl::sycl::cl_float4) SYCL_PAND(cl::sycl::cl_double2) #undef SYCL_PAND #define SYCL_POR(TYPE) \ template <> \ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE por(const TYPE &a, \ const TYPE &b) { \ return convert_vector(vector_as_int(a) | vector_as_int(b)); \ } SYCL_POR(cl::sycl::cl_float4) SYCL_POR(cl::sycl::cl_double2) #undef SYCL_POR #define SYCL_PXOR(TYPE) \ template <> \ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pxor(const TYPE &a, \ const TYPE &b) { \ return convert_vector(vector_as_int(a) ^ vector_as_int(b)); \ } SYCL_PXOR(cl::sycl::cl_float4) SYCL_PXOR(cl::sycl::cl_double2) #undef SYCL_PXOR #define SYCL_PANDNOT(TYPE) \ template <> \ EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pandnot(const TYPE &a, \ const TYPE &b) { \ return convert_vector(vector_as_int(a) & (~vector_as_int(b))); \ } SYCL_PANDNOT(cl::sycl::cl_float4) SYCL_PANDNOT(cl::sycl::cl_double2) #undef SYCL_PANDNOT EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose( PacketBlock& kernel) { float tmp = kernel.packet[0].y(); kernel.packet[0].y() = kernel.packet[1].x(); kernel.packet[1].x() = tmp; tmp = kernel.packet[0].z(); kernel.packet[0].z() = kernel.packet[2].x(); kernel.packet[2].x() = tmp; tmp = kernel.packet[0].w(); kernel.packet[0].w() = kernel.packet[3].x(); kernel.packet[3].x() = tmp; tmp = kernel.packet[1].z(); kernel.packet[1].z() = kernel.packet[2].y(); kernel.packet[2].y() = tmp; tmp = kernel.packet[1].w(); kernel.packet[1].w() = kernel.packet[3].y(); kernel.packet[3].y() = tmp; tmp = kernel.packet[2].w(); kernel.packet[2].w() = kernel.packet[3].z(); kernel.packet[3].z() = tmp; } EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose( PacketBlock& kernel) { double tmp = kernel.packet[0].y(); kernel.packet[0].y() = kernel.packet[1].x(); kernel.packet[1].x() = tmp; } template <> EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pblend( const Selector::size>& ifPacket, const cl::sycl::cl_float4& thenPacket, const cl::sycl::cl_float4& elsePacket) { cl::sycl::cl_int4 condition( ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1, ifPacket.select[2] ? 0 : -1, ifPacket.select[3] ? 0 : -1); return cl::sycl::select(thenPacket, elsePacket, condition); } template <> inline cl::sycl::cl_double2 pblend( const Selector::size>& ifPacket, const cl::sycl::cl_double2& thenPacket, const cl::sycl::cl_double2& elsePacket) { cl::sycl::cl_long2 condition(ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1); return cl::sycl::select(thenPacket, elsePacket, condition); } #endif // SYCL_DEVICE_ONLY #define SYCL_PSTORE(alignment) \ template \ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \ const Eigen::TensorSycl::internal::RangeAccess< \ cl::sycl::access::mode::read_write, \ typename unpacket_traits::type>& to, \ const packet_type& from) { \ pstore##alignment(to.get_pointer(), from); \ } // global space SYCL_PSTORE() SYCL_PSTORE(u) #undef SYCL_PSTORE template EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret( Eigen::TensorSycl::internal::RangeAccess< cl::sycl::access::mode::read_write, typename unpacket_traits::type> to, const packet_type& from) { pstoret(to.get_pointer(), from); } } // end namespace internal } // end namespace Eigen #endif // EIGEN_PACKET_MATH_SYCL_H