diff options
author | Mehdi Goli <mehdi.goli@codeplay.com> | 2016-11-10 19:16:31 +0000 |
---|---|---|
committer | Mehdi Goli <mehdi.goli@codeplay.com> | 2016-11-10 19:16:31 +0000 |
commit | 3be3963021ca0b1725bda2251e641c8561d707f7 (patch) | |
tree | 80bedd4e032b1b2d3db0cacc2948a76edce84cf9 /unsupported | |
parent | 12387abad5ae90a4e17c32d80da2548c3a93e87c (diff) |
Adding EIGEN_STRONG_INLINE back; using size() instead of dimensions.TotalSize() on Tensor.
Diffstat (limited to 'unsupported')
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h | 20 | ||||
-rw-r--r-- | unsupported/test/cxx11_tensor_device_sycl.cpp | 10 |
2 files changed, 15 insertions, 15 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h index 2be1a5ad6..844cec199 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h @@ -44,14 +44,14 @@ struct SyclDevice { // destructor ~SyclDevice() { deallocate_all(); } - template <typename T> void deallocate(T *p) const { + template <typename T> EIGEN_STRONG_INLINE void deallocate(T *p) const { auto it = buffer_map.find(p); if (it != buffer_map.end()) { buffer_map.erase(it); internal::aligned_free(p); } } - void deallocate_all() const { + EIGEN_STRONG_INLINE void deallocate_all() const { std::map<const void *, std::shared_ptr<void>>::iterator it=buffer_map.begin(); while (it!=buffer_map.end()) { auto p=it->first; @@ -88,23 +88,23 @@ struct SyclDevice { } /// allocating memory on the cpu - void *allocate(size_t) const { + EIGEN_STRONG_INLINE void *allocate(size_t) const { return internal::aligned_malloc(8); } // some runtime conditions that can be applied here - bool isDeviceSuitable() const { return true; } + EIGEN_STRONG_INLINE bool isDeviceSuitable() const { return true; } - void memcpy(void *dst, const void *src, size_t n) const { + EIGEN_STRONG_INLINE void memcpy(void *dst, const void *src, size_t n) const { ::memcpy(dst, src, n); } - template<typename T> void memcpyHostToDevice(T *dst, const T *src, size_t n) const { + template<typename T> EIGEN_STRONG_INLINE void memcpyHostToDevice(T *dst, const T *src, size_t n) const { auto host_acc= (static_cast<cl::sycl::buffer<T, 1>*>(add_sycl_buffer(dst, n).first->second.get()))-> template get_access<cl::sycl::access::mode::discard_write, cl::sycl::access::target::host_buffer>(); memcpy(host_acc.get_pointer(), src, n); } - inline void parallel_for_setup(size_t n, size_t &tileSize, size_t &rng, size_t &GRange) const { + EIGEN_STRONG_INLINE void parallel_for_setup(size_t n, size_t &tileSize, size_t &rng, size_t &GRange) const { tileSize =m_queue.get_device(). template get_info<cl::sycl::info::device::max_work_group_size>()/2; rng = n; if (rng==0) rng=1; @@ -116,7 +116,7 @@ struct SyclDevice { } } - template<typename T> void memcpyDeviceToHost(T *dst, const T *src, size_t n) const { + template<typename T> EIGEN_STRONG_INLINE void memcpyDeviceToHost(T *dst, const T *src, size_t n) const { auto it = buffer_map.find(src); if (it != buffer_map.end()) { size_t rng, GRange, tileSize; @@ -141,7 +141,7 @@ struct SyclDevice { } } - template<typename T> void memset(T *buff, int c, size_t n) const { + template<typename T> EIGEN_STRONG_INLINE void memset(T *buff, int c, size_t n) const { size_t rng, GRange, tileSize; parallel_for_setup(n/sizeof(T), tileSize, rng, GRange); @@ -158,7 +158,7 @@ struct SyclDevice { }); m_queue.throw_asynchronous(); } - int majorDeviceVersion() const { + EIGEN_STRONG_INLINE int majorDeviceVersion() const { return 1; } }; diff --git a/unsupported/test/cxx11_tensor_device_sycl.cpp b/unsupported/test/cxx11_tensor_device_sycl.cpp index 820bc88d0..584fa8026 100644 --- a/unsupported/test/cxx11_tensor_device_sycl.cpp +++ b/unsupported/test/cxx11_tensor_device_sycl.cpp @@ -29,11 +29,11 @@ void test_device_sycl(const Eigen::SyclDevice &sycl_device) { array<int, 1> tensorRange = {{sizeDim1}}; Tensor<int, 1> in(tensorRange); Tensor<int, 1> in1(tensorRange); - memset(in1.data(), 1,in1.dimensions().TotalSize()*sizeof(int)); - int * gpu_in_data = static_cast<int*>(sycl_device.allocate(in.dimensions().TotalSize()*sizeof(int))); - sycl_device.memset(gpu_in_data, 1,in.dimensions().TotalSize()*sizeof(int) ); - sycl_device.memcpyDeviceToHost(in.data(), gpu_in_data, in.dimensions().TotalSize()*sizeof(int) ); - for (int i=0; i<in.dimensions().TotalSize(); i++) + memset(in1.data(), 1,in1.size()*sizeof(int)); + int * gpu_in_data = static_cast<int*>(sycl_device.allocate(in.size()*sizeof(int))); + sycl_device.memset(gpu_in_data, 1,in.size()*sizeof(int) ); + sycl_device.memcpyDeviceToHost(in.data(), gpu_in_data, in.size()*sizeof(int) ); + for (int i=0; i<in.size(); i++) VERIFY_IS_APPROX(in(i), in1(i)); sycl_device.deallocate(gpu_in_data); } |