From 35bae513a0094f986c810c3f839e5a954caabd4b Mon Sep 17 00:00:00 2001 From: Mehdi Goli Date: Fri, 16 Dec 2016 19:46:45 +0000 Subject: Converting all parallel for lambda to functor in order to prevent kernel duplication name error; adding tensorConcatinationOp backend for sycl. --- .../Eigen/CXX11/src/Tensor/TensorDeviceSycl.h | 60 ++++++++++++++-------- 1 file changed, 39 insertions(+), 21 deletions(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h index 96c95e294..d444f3cd8 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h @@ -43,6 +43,18 @@ namespace Eigen { size_t m_offset; }; + struct memsetkernelFunctor{ + typedef cl::sycl::accessor AccType; + AccType m_acc; + const size_t m_rng, m_c; + memsetkernelFunctor(AccType acc, const size_t rng, const size_t c):m_acc(acc), m_rng(rng), m_c(c){} + void operator()(cl::sycl::nd_item<1> itemID) { + auto globalid=itemID.get_global_linear_id(); + if (globalid< m_rng) m_acc[globalid] = m_c; + } + + }; + EIGEN_STRONG_INLINE auto get_sycl_supported_devices()->decltype(cl::sycl::device::get_devices()){ auto devices = cl::sycl::device::get_devices(); std::vector::iterator it =devices.begin(); @@ -88,15 +100,17 @@ struct QueueInterface { } } })) - #else - m_queue(cl::sycl::queue(s, [&](cl::sycl::exception_list l) { - for (const auto& e : l) { - if (e) { - exception_caught_ = true; - } - } - })) - #endif +#else +m_queue(cl::sycl::queue(s, [&](cl::sycl::exception_list l) { + for (const auto& e : l) { + if (e) { + exception_caught_ = true; + std::cerr << "Error detected Inside Sycl Device."<< std::endl; + + } + } +})) +#endif {} /// Allocating device pointer. This pointer is actually an 8 bytes host pointer used as key to access the sycl device buffer. @@ -256,22 +270,26 @@ struct SyclDevice { /// returning the sycl queue EIGEN_STRONG_INLINE cl::sycl::queue& sycl_queue() const { return m_queue_stream->m_queue;} /// Here is the implementation of memset function on sycl. - template EIGEN_STRONG_INLINE void memset(T *data, int c, size_t n) const { + EIGEN_STRONG_INLINE void memset(void *data, int c, size_t n) const { size_t rng, GRange, tileSize; - parallel_for_setup(n/sizeof(T), tileSize, rng, GRange); - sycl_queue().submit([&](cl::sycl::handler &cgh) { - auto buf_acc =get_sycl_buffer(static_cast(static_cast(data))). template get_access(cgh); - cgh.parallel_for( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), [=](cl::sycl::nd_item<1> itemID) { - auto globalid=itemID.get_global_linear_id(); - if (globalid< rng) { - for(size_t i=0; i(static_cast(data))),rng, GRange, tileSize, c )); asynchronousExec(); } + struct memsetCghFunctor{ + cl::sycl::buffer& m_buf; + const size_t& rng , GRange, tileSize; + const int &c; + memsetCghFunctor(cl::sycl::buffer& buff, const size_t& rng_, const size_t& GRange_, const size_t& tileSize_, const int& c_) + :m_buf(buff), rng(rng_), GRange(GRange_), tileSize(tileSize_), c(c_){} + + void operator()(cl::sycl::handler &cgh) const { + auto buf_acc = m_buf.template get_access(cgh); + cgh.parallel_for(cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), memsetkernelFunctor(buf_acc, rng, c)); + } + }; + EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const { // FIXME return 48*1024; -- cgit v1.2.3