aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
diff options
context:
space:
mode:
authorGravatar Mehdi Goli <mehdi.goli@codeplay.com>2016-12-14 17:38:53 +0000
committerGravatar Mehdi Goli <mehdi.goli@codeplay.com>2016-12-14 17:38:53 +0000
commit730eb9fe1c0e0daa81aebbc4dbce52e185dda3dd (patch)
treef63b028b488991af4d1c59bdb611df7866005449 /unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
parent2d4a091beb9e55664c1475137af7166d524cbc1d (diff)
Adding asynchronous execution as it improves the performance.
Diffstat (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h10
1 files changed, 7 insertions, 3 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
index f92ea1d7b..46776d777 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
@@ -214,7 +214,7 @@ struct SyclDevice {
auto dst_acc =it2->second.template get_access<cl::sycl::access::mode::discard_write, cl::sycl::access::target::global_buffer>(cgh);
cgh.parallel_for(cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), MemCopyFunctor<T>(src_acc, dst_acc, rng, 0, offset));
});
- synchronize();
+ asynchronousExec();
}
/// The memcpyHostToDevice is used to copy the device only pointer to a host pointer. Using the device
@@ -245,7 +245,7 @@ struct SyclDevice {
auto dst_acc =dest_buf.template get_access<cl::sycl::access::mode::discard_write, cl::sycl::access::target::global_buffer>(cgh);
cgh.parallel_for( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), MemCopyFunctor<T>(src_acc, dst_acc, rng, 0, 0));
});
- synchronize();
+ asynchronousExec();
}
/// returning the sycl queue
EIGEN_STRONG_INLINE cl::sycl::queue& sycl_queue() const { return m_queue_stream->m_queue;}
@@ -263,7 +263,7 @@ struct SyclDevice {
}
});
});
- synchronize();
+ asynchronousExec();
}
EIGEN_STRONG_INLINE size_t firstLevelCacheSize() const {
@@ -282,6 +282,10 @@ struct SyclDevice {
EIGEN_STRONG_INLINE void synchronize() const {
sycl_queue().wait_and_throw(); //pass
}
+
+ EIGEN_STRONG_INLINE void asynchronousExec() const {
+ sycl_queue().throw_asynchronous();//pass
+ }
// This function checks if the runtime recorded an error for the
// underlying stream device.
EIGEN_STRONG_INLINE bool ok() const {