From d0ae052da4ce25a5b4306bfbb5bf8edcd010b663 Mon Sep 17 00:00:00 2001 From: mehdi-goli Date: Tue, 7 Jan 2020 15:13:37 +0000 Subject: [SYCL Backend] * Adding Missing operations for vector comparison in SYCL. This caused compiler error for vector comparison when compiling SYCL * Fixing the compiler error for placement new in TensorForcedEval.h This caused compiler error when compiling SYCL backend * Reducing the SYCL warning by removing the abort function inside the kernel * Adding Strong inline to functions inside SYCL interop. --- Eigen/src/Core/arch/SYCL/InteropHeaders.h | 32 +++--- Eigen/src/Core/arch/SYCL/PacketMath.h | 109 +++++++++++++++++++++ .../Eigen/CXX11/src/Tensor/TensorForcedEval.h | 27 ++++- 3 files changed, 149 insertions(+), 19 deletions(-) diff --git a/Eigen/src/Core/arch/SYCL/InteropHeaders.h b/Eigen/src/Core/arch/SYCL/InteropHeaders.h index 5cef1a49f..44042f441 100644 --- a/Eigen/src/Core/arch/SYCL/InteropHeaders.h +++ b/Eigen/src/Core/arch/SYCL/InteropHeaders.h @@ -147,7 +147,7 @@ struct PacketWrapper { typedef typename ::Eigen::internal::unpacket_traits::type Scalar; template - EIGEN_DEVICE_FUNC static Scalar scalarize(Index index, PacketReturnType &in) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Scalar scalarize(Index index, PacketReturnType &in) { switch (index) { case 0: return in.x(); @@ -158,17 +158,18 @@ struct PacketWrapper { case 3: return in.w(); default: - eigen_assert(false && "INDEX MUST BE BETWEEN 0 and 3"); - abort(); + //INDEX MUST BE BETWEEN 0 and 3.There is no abort function in SYCL kernel. so we cannot use abort here. + // The code will never reach here + __builtin_unreachable(); } __builtin_unreachable(); - } - EIGEN_DEVICE_FUNC static PacketReturnType convert_to_packet_type( + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static PacketReturnType convert_to_packet_type( Scalar in, Scalar other) { return PacketReturnType(in, other, other, other); } - EIGEN_DEVICE_FUNC static void set_packet(PacketReturnType &lhs, Scalar *rhs) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void set_packet(PacketReturnType &lhs, Scalar *rhs) { lhs = PacketReturnType(rhs[0], rhs[1], rhs[2], rhs[3]); } }; @@ -178,14 +179,14 @@ struct PacketWrapper { typedef typename ::Eigen::internal::unpacket_traits::type Scalar; template - EIGEN_DEVICE_FUNC static Scalar scalarize(Index, PacketReturnType &in) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Scalar scalarize(Index, PacketReturnType &in) { return in; } - EIGEN_DEVICE_FUNC static PacketReturnType convert_to_packet_type(Scalar in, + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static PacketReturnType convert_to_packet_type(Scalar in, Scalar) { return PacketReturnType(in); } - EIGEN_DEVICE_FUNC static void set_packet(PacketReturnType &lhs, Scalar *rhs) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void set_packet(PacketReturnType &lhs, Scalar *rhs) { lhs = rhs[0]; } }; @@ -195,24 +196,25 @@ struct PacketWrapper { typedef typename ::Eigen::internal::unpacket_traits::type Scalar; template - EIGEN_DEVICE_FUNC static Scalar scalarize(Index index, PacketReturnType &in) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Scalar scalarize(Index index, PacketReturnType &in) { switch (index) { case 0: return in.x(); case 1: return in.y(); default: - eigen_assert(false && "INDEX MUST BE BETWEEN 0 and 1"); - abort(); + //INDEX MUST BE BETWEEN 0 and 1.There is no abort function in SYCL kernel. so we cannot use abort here. + // The code will never reach here + __builtin_unreachable(); } __builtin_unreachable(); - } - EIGEN_DEVICE_FUNC static PacketReturnType convert_to_packet_type( + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static PacketReturnType convert_to_packet_type( Scalar in, Scalar other) { return PacketReturnType(in, other); } - EIGEN_DEVICE_FUNC static void set_packet(PacketReturnType &lhs, Scalar *rhs) { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void set_packet(PacketReturnType &lhs, Scalar *rhs) { lhs = PacketReturnType(rhs[0], rhs[1]); } }; diff --git a/Eigen/src/Core/arch/SYCL/PacketMath.h b/Eigen/src/Core/arch/SYCL/PacketMath.h index a9adb64ba..b11b5af9d 100644 --- a/Eigen/src/Core/arch/SYCL/PacketMath.h +++ b/Eigen/src/Core/arch/SYCL/PacketMath.h @@ -472,6 +472,115 @@ pabs(const cl::sycl::cl_double2& a) { return cl::sycl::cl_double2(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y())); } +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_le(const Packet &a, + const Packet &b) { + return ((a <= b) + .template convert::type, + cl::sycl::rounding_mode::automatic>()); +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_lt(const Packet &a, + const Packet &b) { + return ((a < b) + .template convert::type, + cl::sycl::rounding_mode::automatic>()); +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_eq(const Packet &a, + const Packet &b) { + return ((a == b) + .template convert::type, + cl::sycl::rounding_mode::automatic>()); +} + +#define SYCL_PCMP(OP, TYPE) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE TYPE pcmp_##OP(const TYPE &a, \ + const TYPE &b) { \ + return sycl_pcmp_##OP(a, b); \ + } + +SYCL_PCMP(le, cl::sycl::cl_float4) +SYCL_PCMP(lt, cl::sycl::cl_float4) +SYCL_PCMP(eq, cl::sycl::cl_float4) +SYCL_PCMP(le, cl::sycl::cl_double2) +SYCL_PCMP(lt, cl::sycl::cl_double2) +SYCL_PCMP(eq, cl::sycl::cl_double2) +#undef SYCL_PCMP + +template struct convert_to_integer; + +template <> struct convert_to_integer { + using type = int; + using packet_type = cl::sycl::cl_int4; +}; +template <> struct convert_to_integer { + using type = long; + using packet_type = cl::sycl::cl_long2; +}; + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename convert_to_integer< + typename unpacket_traits::type>::packet_type +vector_as_int(const PacketIn &p) { + return ( + p.template convert::type>::type, + cl::sycl::rounding_mode::automatic>()); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packetOut +convert_vector(const PacketIn &p) { + return (p.template convert::type, + cl::sycl::rounding_mode::automatic>()); +} + +#define SYCL_PAND(TYPE) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pand(const TYPE &a, \ + const TYPE &b) { \ + return convert_vector(vector_as_int(a) & vector_as_int(b)); \ + } +SYCL_PAND(cl::sycl::cl_float4) +SYCL_PAND(cl::sycl::cl_double2) +#undef SYCL_PAND + +#define SYCL_POR(TYPE) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE por(const TYPE &a, \ + const TYPE &b) { \ + return convert_vector(vector_as_int(a) | vector_as_int(b)); \ + } + +SYCL_POR(cl::sycl::cl_float4) +SYCL_POR(cl::sycl::cl_double2) +#undef SYCL_POR + +#define SYCL_PXOR(TYPE) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pxor(const TYPE &a, \ + const TYPE &b) { \ + return convert_vector(vector_as_int(a) ^ vector_as_int(b)); \ + } + +SYCL_PXOR(cl::sycl::cl_float4) +SYCL_PXOR(cl::sycl::cl_double2) +#undef SYCL_PXOR + +#define SYCL_PANDNOT(TYPE) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TYPE pandnot(const TYPE &a, \ + const TYPE &b) { \ + return convert_vector(vector_as_int(a) & (~vector_as_int(b))); \ + } +SYCL_PANDNOT(cl::sycl::cl_float4) +SYCL_PANDNOT(cl::sycl::cl_double2) +#undef SYCL_PANDNOT + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void ptranspose( PacketBlock& kernel) { float tmp = kernel.packet[0].y(); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h index 7ba32f13e..14020aa68 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorForcedEval.h @@ -77,6 +77,28 @@ class TensorForcedEvalOp : public TensorBase, ReadOn typename XprType::Nested m_xpr; }; +namespace internal { +template +struct non_integral_type_placement_new{ + template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index numValues, StorageType m_buffer) { + // Initialize non-trivially constructible types. + if (!internal::is_arithmetic::value) { + for (Index i = 0; i < numValues; ++i) new (m_buffer + i) CoeffReturnType(); + } +} +}; + +// SYCL does not support non-integral types +// having new (m_buffer + i) CoeffReturnType() causes the following compiler error for SYCL Devices +// no matching function for call to 'operator new' +template +struct non_integral_type_placement_new { + template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void operator()(Index, StorageType) { +} +}; +} // end namespace internal template struct TensorEvaluator, Device> @@ -127,10 +149,7 @@ struct TensorEvaluator, Device> const Index numValues = internal::array_prod(m_impl.dimensions()); m_buffer = m_device.get((CoeffReturnType*)m_device.allocate_temp(numValues * sizeof(CoeffReturnType))); - // Initialize non-trivially constructible types. - if (!internal::is_arithmetic::value) { - for (Index i = 0; i < numValues; ++i) new (m_buffer + i) CoeffReturnType(); - } + internal::non_integral_type_placement_new()(numValues, m_buffer); typedef TensorEvalToOp< const typename internal::remove_const::type > EvalTo; EvalTo evalToTmp(m_device.get(m_buffer), m_op); -- cgit v1.2.3