From 79aa2b784ecc26d6a8ef6fb2b2b053f4ad81593b Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Thu, 1 Dec 2016 13:02:27 +0000 Subject: Adding sycl backend for TensorPadding.h; disbaling __unit128 for sycl in TensorIntDiv.h; disabling cashsize for sycl in tensorDeviceDefault.h; adding sycl backend for StrideSliceOP ; removing sycl compiler warning for creating an array of size 0 in CXX11Meta.h; cleaning up the sycl backend code. --- .../Eigen/CXX11/src/Tensor/TensorDeviceSycl.h | 62 +++++++++++++--------- 1 file changed, 37 insertions(+), 25 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h index 1fd00d4f6..40dd5d81a 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h @@ -17,6 +17,32 @@ namespace Eigen { + #define ConvertToActualTypeSycl(Scalar, buf_acc) reinterpret_cast::pointer_t>((&(*buf_acc.get_pointer()))) + + template class MemCopyFunctor { + public: + typedef cl::sycl::accessor read_accessor; + typedef cl::sycl::accessor write_accessor; + + MemCopyFunctor(read_accessor src_acc, write_accessor dst_acc, size_t rng, size_t i, size_t offset): m_src_acc(src_acc), m_dst_acc(dst_acc), m_rng(rng), m_i(i), m_offset(offset) {} + + void operator()(cl::sycl::nd_item<1> itemID) { + auto src_ptr = ConvertToActualTypeSycl(Scalar, m_src_acc); + auto dst_ptr = ConvertToActualTypeSycl(Scalar, m_dst_acc); + auto globalid = itemID.get_global_linear_id(); + if (globalid < m_rng) { + dst_ptr[globalid + m_i] = src_ptr[globalid + m_offset]; + } + } + + private: + read_accessor m_src_acc; + write_accessor m_dst_acc; + size_t m_rng; + size_t m_i; + size_t m_offset; + }; + EIGEN_STRONG_INLINE auto get_sycl_supported_devices()->decltype(cl::sycl::device::get_devices()){ auto devices = cl::sycl::device::get_devices(); std::vector::iterator it =devices.begin(); @@ -33,7 +59,6 @@ EIGEN_STRONG_INLINE auto get_sycl_supported_devices()->decltype(cl::sycl::device } return devices; } -#define ConvertToActualTypeSycl(T, buf_acc) reinterpret_cast::pointer_t>((&(*buf_acc.get_pointer()))) struct QueueInterface { /// class members: @@ -170,30 +195,6 @@ struct SyclDevice { // some runtime conditions that can be applied here EIGEN_STRONG_INLINE bool isDeviceSuitable() const { return true; } - template class MemCopyFunctor { - public: - typedef cl::sycl::accessor read_accessor; - typedef cl::sycl::accessor write_accessor; - - MemCopyFunctor(read_accessor src_acc, write_accessor dst_acc, size_t rng, size_t i, size_t offset): m_src_acc(src_acc), m_dst_acc(dst_acc), m_rng(rng), m_i(i), m_offset(offset) {} - - void operator()(cl::sycl::nd_item<1> itemID) { - auto src_ptr = ConvertToActualTypeSycl(T, m_src_acc); - auto dst_ptr = ConvertToActualTypeSycl(T, m_dst_acc); - auto globalid = itemID.get_global_linear_id(); - if (globalid < m_rng) { - dst_ptr[globalid + m_i] = src_ptr[globalid + m_offset]; - } - } - - private: - read_accessor m_src_acc; - write_accessor m_dst_acc; - size_t m_rng; - size_t m_i; - size_t m_offset; - }; - /// the memcpy function template EIGEN_STRONG_INLINE void memcpy(void *dst, const T *src, size_t n) const { auto it1 = m_queue_stream->find_buffer((void*)src); @@ -260,6 +261,17 @@ struct SyclDevice { }); synchronize(); } + + EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { + // FIXME + return 48*1024; + } + + EIGEN_STRONG_INLINE size_t lastLevelCacheSize() const { + // We won't try to take advantage of the l2 cache for the time being, and + // there is no l3 cache on cuda devices. + return firstLevelCacheSize(); + } /// No need for sycl it should act the same as CPU version EIGEN_STRONG_INLINE int majorDeviceVersion() const { return 1; } -- cgit v1.2.3