diff options
author | Mehdi Goli <mehdi.goli@codeplay.com> | 2017-01-19 17:06:21 +0000 |
---|---|---|
committer | Mehdi Goli <mehdi.goli@codeplay.com> | 2017-01-19 17:06:21 +0000 |
commit | 77cc4d06c746e7be2966bd0d09b55c2393e289d8 (patch) | |
tree | 009578cd2d88c4d211adaf1262d9e4b2d62161a1 | |
parent | 837fdbdcb27825204d00a1da954612b52fb851b6 (diff) |
Removing unused variables
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h | 8 | ||||
-rw-r--r-- | unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h | 2 |
2 files changed, 5 insertions, 5 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h index 7774342d8..e2569e1bf 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorConvolutionSycl.h @@ -326,7 +326,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr // extract input functor list InputFunctorExpr input_functors = Eigen::TensorSycl::internal::extractFunctors(m_inputImpl); - const unsigned long maxSharedMem = m_device.sharedMemPerBlock(); // sycl localmemory size + m_device.sycl_queue().submit([&](cl::sycl::handler &cgh) { typedef cl::sycl::accessor<CoeffReturnType, 1, cl::sycl::access::mode::read_write, cl::sycl::access::target::local> InputLocalAcc; @@ -348,7 +348,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr size_t range_x, GRange_x, tileSize_x, range_y, GRange_y, tileSize_y; m_device.parallel_for_setup(numX, numP, tileSize_x,tileSize_y,range_x,range_y, GRange_x, GRange_y ); const size_t shared_mem =(tileSize_x +kernel_size -1)*(tileSize_y); - assert(static_cast<unsigned long>(shared_mem) <= maxSharedMem); + assert(static_cast<unsigned long>(shared_mem) <= m_device.sharedMemPerBlock()); auto global_range=cl::sycl::range<2>(GRange_x, GRange_y); // global range auto local_range=cl::sycl::range<2>(tileSize_x, tileSize_y); // local range InputLocalAcc local_acc(cl::sycl::range<1>(shared_mem), cgh); @@ -373,7 +373,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr size_t range_x, GRange_x, tileSize_x, range_y, GRange_y, tileSize_y, range_z, GRange_z, tileSize_z; m_device.parallel_for_setup(numX, numY, numP, tileSize_x, tileSize_y, tileSize_z, range_x, range_y, range_z, GRange_x, GRange_y, GRange_z ); const size_t shared_mem =(tileSize_x +kernel_size_x -1)*(tileSize_y +kernel_size_y -1) * tileSize_z; - assert(static_cast<unsigned long>(shared_mem) <= maxSharedMem); + assert(static_cast<unsigned long>(shared_mem) <= m_device.sharedMemPerBlock()); auto global_range=cl::sycl::range<3>(GRange_x, GRange_y, GRange_z); // global range auto local_range=cl::sycl::range<3>(tileSize_x, tileSize_y, tileSize_z); // local range InputLocalAcc local_acc(cl::sycl::range<1>(shared_mem), cgh); @@ -404,7 +404,7 @@ struct TensorEvaluator<const TensorConvolutionOp<Indices, InputArgType, KernelAr size_t range_x, GRange_x, tileSize_x, range_y, GRange_y, tileSize_y, range_z, GRange_z, tileSize_z; m_device.parallel_for_setup(numX, numY, numZ, tileSize_x, tileSize_y, tileSize_z, range_x, range_y, range_z, GRange_x, GRange_y, GRange_z ); const size_t shared_mem =(tileSize_x +kernel_size_x -1)*(tileSize_y +kernel_size_y -1) * (tileSize_z +kernel_size_y -1); - assert(static_cast<unsigned long>(shared_mem) <= maxSharedMem); + assert(static_cast<unsigned long>(shared_mem) <= m_device.sharedMemPerBlock()); auto global_range=cl::sycl::range<3>(GRange_x, GRange_y, GRange_z); // global range auto local_range=cl::sycl::range<3>(tileSize_x, tileSize_y, tileSize_z); // local range InputLocalAcc local_acc(cl::sycl::range<1>(shared_mem), cgh); diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h index ae8a9f667..a30090714 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h @@ -386,7 +386,7 @@ struct SyclDevice { return 2;//sycl_queue().get_device(). template get_info<cl::sycl::info::device::max_work_group_size>(); // return stream_->deviceProperties().maxThreadsPerMultiProcessor; } - EIGEN_STRONG_INLINE int sharedMemPerBlock() const { + EIGEN_STRONG_INLINE size_t sharedMemPerBlock() const { return sycl_queue().get_device(). template get_info<cl::sycl::info::device::local_mem_size>(); // return stream_->deviceProperties().sharedMemPerBlock; } |