aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported
diff options
context:
space:
mode:
authorGravatar Mehdi Goli <mehdi.goli@codeplay.com>2016-11-23 16:30:41 +0000
committerGravatar Mehdi Goli <mehdi.goli@codeplay.com>2016-11-23 16:30:41 +0000
commitb8cc5635d581d3b3ea9950ce8359681ae01491a2 (patch)
treefe2af800157e6ffbb02d593e0ebb860bff0cba16 /unsupported
parentf11da1d83b64f66252dcce17447c63bda2c663b7 (diff)
Removing unsupported device from test case; cleaning the tensor device sycl.
Diffstat (limited to 'unsupported')
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h57
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h7
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h6
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h8
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h4
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h6
-rw-r--r--unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h9
-rw-r--r--unsupported/test/cxx11_tensor_broadcast_sycl.cpp10
-rw-r--r--unsupported/test/cxx11_tensor_builtins_sycl.cpp16
-rw-r--r--unsupported/test/cxx11_tensor_device_sycl.cpp6
-rw-r--r--unsupported/test/cxx11_tensor_forced_eval_sycl.cpp15
-rw-r--r--unsupported/test/cxx11_tensor_morphing_sycl.cpp18
-rw-r--r--unsupported/test/cxx11_tensor_reduction_sycl.cpp6
-rw-r--r--unsupported/test/cxx11_tensor_sycl.cpp14
14 files changed, 89 insertions, 93 deletions
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
index 05459f1d2..c1a27b5d6 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceSycl.h
@@ -31,7 +31,7 @@ struct QueueInterface {
mutable std::map<const uint8_t *, cl::sycl::buffer<uint8_t, 1>> buffer_map;
/// sycl queue
mutable cl::sycl::queue m_queue;
- /// creating device by using selector
+ /// creating device by using cl::sycl::selector or cl::sycl::device both are the same and can be captured throufh dev_Selector typename
/// SyclStreamDevice is not owned. it is the caller's responsibility to destroy it.
template<typename dev_Selector> explicit QueueInterface(dev_Selector s):
#ifdef EIGEN_EXCEPTIONS
@@ -52,28 +52,6 @@ struct QueueInterface {
#endif
{}
- /// creating device by using selector
- /// SyclStreamDevice is not owned. it is the caller's responsibility to destroy it.
- explicit QueueInterface(cl::sycl::device d):
-#ifdef EIGEN_EXCEPTIONS
- m_queue(cl::sycl::queue(d, [&](cl::sycl::exception_list l) {
- for (const auto& e : l) {
- try {
- if (e) {
- exception_caught_ = true;
- std::rethrow_exception(e);
- }
- } catch (cl::sycl::exception e) {
- std::cerr << e.what() << std::endl;
- }
- }
- }))
-#else
- m_queue(cl::sycl::queue(d))
-#endif
- {}
-
-
/// Allocating device pointer. This pointer is actually an 8 bytes host pointer used as key to access the sycl device buffer.
/// The reason is that we cannot use device buffer as a pointer as a m_data in Eigen leafNode expressions. So we create a key
/// pointer to be used in Eigen expression construction. When we convert the Eigen construction into the sycl construction we
@@ -162,27 +140,28 @@ struct SyclDevice {
/// the buffer in the buffer_map. If found it gets the accessor from it, if not,
/// the function then adds an entry by creating a sycl buffer for that particular pointer.
template <cl::sycl::access::mode AcMd> EIGEN_STRONG_INLINE cl::sycl::accessor<uint8_t, 1, AcMd, cl::sycl::access::target::global_buffer>
- get_sycl_accessor(size_t num_bytes, cl::sycl::handler &cgh, const void* ptr) const {
- return (get_sycl_buffer(num_bytes, ptr).template get_access<AcMd, cl::sycl::access::target::global_buffer>(cgh));
+ get_sycl_accessor(cl::sycl::handler &cgh, const void* ptr) const {
+ return (get_sycl_buffer(ptr).template get_access<AcMd, cl::sycl::access::target::global_buffer>(cgh));
}
/// Accessing the created sycl device buffer for the device pointer
- EIGEN_STRONG_INLINE cl::sycl::buffer<uint8_t, 1>& get_sycl_buffer(size_t , const void * ptr) const {
+ EIGEN_STRONG_INLINE cl::sycl::buffer<uint8_t, 1>& get_sycl_buffer(const void * ptr) const {
return m_queue_stream->find_buffer(ptr)->second;
}
/// This is used to prepare the number of threads and also the number of threads per block for sycl kernels
- EIGEN_STRONG_INLINE void parallel_for_setup(size_t n, size_t &tileSize, size_t &rng, size_t &GRange) const {
- tileSize =sycl_queue().get_device(). template get_info<cl::sycl::info::device::max_work_group_size>()/2;
- rng = n;
- if (rng==0) rng=1;
- GRange=rng;
- if (tileSize>GRange) tileSize=GRange;
- else if(GRange>tileSize){
- size_t xMode = GRange % tileSize;
- if (xMode != 0) GRange += (tileSize - xMode);
- }
+ template<typename T>
+ EIGEN_STRONG_INLINE void parallel_for_setup(T n, T &tileSize, T &rng, T &GRange) const {
+ tileSize =static_cast<T>(sycl_queue().get_device(). template get_info<cl::sycl::info::device::max_work_group_size>()/2);
+ rng = n;
+ if (rng==0) rng=static_cast<T>(1);
+ GRange=rng;
+ if (tileSize>GRange) tileSize=GRange;
+ else if(GRange>tileSize){
+ T xMode = static_cast<T>(GRange % tileSize);
+ if (xMode != 0) GRange += static_cast<T>(tileSize - xMode);
}
+ }
/// allocate device memory
EIGEN_STRONG_INLINE void *allocate(size_t num_bytes) const {
return m_queue_stream->allocate(num_bytes);
@@ -220,7 +199,7 @@ struct SyclDevice {
/// buffer to host. Then we use the memcpy to copy the data to the host accessor. The first time that
/// this buffer is accessed, the data will be copied to the device.
template<typename T> EIGEN_STRONG_INLINE void memcpyHostToDevice(T *dst, const T *src, size_t n) const {
- auto host_acc= get_sycl_buffer(n, dst). template get_access<cl::sycl::access::mode::discard_write, cl::sycl::access::target::host_buffer>();
+ auto host_acc= get_sycl_buffer(dst). template get_access<cl::sycl::access::mode::discard_write, cl::sycl::access::target::host_buffer>();
::memcpy(host_acc.get_pointer(), src, n);
}
/// The memcpyDeviceToHost is used to copy the data from host to device. Here, in order to avoid double copying the data. We create a sycl
@@ -251,10 +230,10 @@ struct SyclDevice {
size_t rng, GRange, tileSize;
parallel_for_setup(n/sizeof(T), tileSize, rng, GRange);
sycl_queue().submit([&](cl::sycl::handler &cgh) {
- auto buf_acc =get_sycl_buffer(n, static_cast<uint8_t*>(static_cast<void*>(buff))). template get_access<cl::sycl::access::mode::discard_write, cl::sycl::access::target::global_buffer>(cgh);
+ auto buf_acc =get_sycl_buffer(static_cast<uint8_t*>(static_cast<void*>(buff))). template get_access<cl::sycl::access::mode::discard_write, cl::sycl::access::target::global_buffer>(cgh);
cgh.parallel_for<SyclDevice>( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), [=](cl::sycl::nd_item<1> itemID) {
auto globalid=itemID.get_global_linear_id();
- if (globalid< buf_acc.get_size()) {
+ if (globalid< n) {
for(size_t i=0; i<sizeof(T); i++)
buf_acc[globalid*sizeof(T) + i] = c;
}
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h
index f293869ee..2f7468d56 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorReductionSycl.h
@@ -135,8 +135,7 @@ struct FullReducer<Self, Op, const Eigen::SyclDevice, Vectorizable> {
/// if the shared memory is less than the GRange, we set shared_mem size to the TotalSize and in this case one kernel would be created for recursion to reduce all to one.
if (GRange < outTileSize) outTileSize=GRange;
// getting final out buffer at the moment the created buffer is true because there is no need for assign
-// auto out_buffer =dev.template get_sycl_buffer<typename Eigen::internal::remove_all<CoeffReturnType>::type>(self.dimensions().TotalSize(), output);
- auto out_buffer =dev.get_sycl_buffer(self.dimensions().TotalSize(), output);
+ auto out_buffer =dev.get_sycl_buffer(output);
/// creating the shared memory for calculating reduction.
/// This one is used to collect all the reduced value of shared memory as we dont have global barrier on GPU. Once it is saved we can
@@ -191,7 +190,7 @@ struct InnerReducer<Self, Op, const Eigen::SyclDevice> {
typedef const typename Self::ChildType HostExpr; /// this is the child of reduction
typedef typename TensorSycl::internal::createPlaceHolderExpression<HostExpr>::Type PlaceHolderExpr;
auto functors = TensorSycl::internal::extractFunctors(self.impl());
- size_t range, GRange, tileSize;
+ typename Self::Index range, GRange, tileSize;
dev.parallel_for_setup(num_coeffs_to_preserve, tileSize, range, GRange);
// getting final out buffer at the moment the created buffer is true because there is no need for assign
/// creating the shared memory for calculating reduction.
@@ -204,7 +203,7 @@ struct InnerReducer<Self, Op, const Eigen::SyclDevice> {
dev.sycl_queue().submit([&](cl::sycl::handler &cgh) {
// create a tuple of accessors from Evaluator
auto tuple_of_accessors = TensorSycl::internal::createTupleOfAccessors(cgh, self.impl());
- auto output_accessor = dev.template get_sycl_accessor<cl::sycl::access::mode::discard_write>(num_coeffs_to_preserve,cgh, output);
+ auto output_accessor = dev.template get_sycl_accessor<cl::sycl::access::mode::discard_write>(cgh, output);
cgh.parallel_for<Self>( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), [=](cl::sycl::nd_item<1> itemID) {
typedef typename TensorSycl::internal::ConvertToDeviceExpression<const HostExpr>::Type DevExpr;
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h
index bb847afad..0336c9866 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclConvertToDeviceExpression.h
@@ -48,9 +48,9 @@ struct DeviceConvertor{
/// specialisation of the \ref ConvertToDeviceExpression struct when the node
/// type is TensorMap
#define TENSORMAPCONVERT(CVQual)\
-template <typename T, int Options2_, template <class> class MakePointer_>\
-struct ConvertToDeviceExpression<CVQual TensorMap<T, Options2_, MakePointer_> > {\
- typedef CVQual TensorMap<T, Options2_, MakeGlobalPointer> Type;\
+template <typename T, int Options_, template <class> class MakePointer_>\
+struct ConvertToDeviceExpression<CVQual TensorMap<T, Options_, MakePointer_> > {\
+ typedef CVQual TensorMap<T, Options_, MakeGlobalPointer> Type;\
};
TENSORMAPCONVERT(const)
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h
index d7551d94f..3d3142996 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExprConstructor.h
@@ -46,11 +46,11 @@ struct ExprConstructor;
/// specialisation of the \ref ExprConstructor struct when the node type is
/// TensorMap
#define TENSORMAP(CVQual)\
-template <typename T, int Options2_, int Options3_,\
+template <typename T, int Options_,\
template <class> class MakePointer_, size_t N, typename... Params>\
-struct ExprConstructor< CVQual TensorMap<T, Options2_, MakeGlobalPointer>,\
-CVQual PlaceHolder<CVQual TensorMap<T, Options3_, MakePointer_>, N>, Params...>{\
- typedef CVQual TensorMap<T, Options2_, MakeGlobalPointer> Type;\
+struct ExprConstructor< CVQual TensorMap<T, Options_, MakeGlobalPointer>,\
+CVQual PlaceHolder<CVQual TensorMap<T, Options_, MakePointer_>, N>, Params...>{\
+ typedef CVQual TensorMap<T, Options_, MakeGlobalPointer> Type;\
Type expr;\
template <typename FuncDetector>\
ExprConstructor(FuncDetector &fd, const utility::tuple::Tuple<Params...> &t)\
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h
index 94a1452ec..06e2d5ae0 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclExtractAccessor.h
@@ -57,8 +57,8 @@ struct AccessorConstructor{
return utility::tuple::append(ExtractAccessor<Arg1>::getTuple(cgh, eval1),utility::tuple::append(ExtractAccessor<Arg2>::getTuple(cgh, eval2), ExtractAccessor<Arg3>::getTuple(cgh, eval3)));
}
template< cl::sycl::access::mode AcM, typename Arg> static inline auto getAccessor(cl::sycl::handler& cgh, Arg eval)
- -> decltype(utility::tuple::make_tuple( eval.device().template get_sycl_accessor<AcM>(eval.dimensions().TotalSize(), cgh,eval.data()))){
- return utility::tuple::make_tuple(eval.device().template get_sycl_accessor<AcM>(eval.dimensions().TotalSize(), cgh,eval.data()));
+ -> decltype(utility::tuple::make_tuple( eval.device().template get_sycl_accessor<AcM>(cgh,eval.data()))){
+ return utility::tuple::make_tuple(eval.device().template get_sycl_accessor<AcM>(cgh,eval.data()));
}
};
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h
index 0340b777f..5a2df7807 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclPlaceHolderExpr.h
@@ -122,9 +122,9 @@ ASSIGNEXPR()
/// specialisation of the \ref PlaceHolderExpression when the node is
/// TensorMap
#define TENSORMAPEXPR(CVQual)\
-template <typename T, int Options2_, template <class> class MakePointer_, size_t N>\
-struct PlaceHolderExpression< CVQual TensorMap< T, Options2_, MakePointer_>, N> {\
- typedef CVQual PlaceHolder<CVQual TensorMap<T, Options2_, MakePointer_>, N> Type;\
+template <typename T, int Options_, template <class> class MakePointer_, size_t N>\
+struct PlaceHolderExpression< CVQual TensorMap< T, Options_, MakePointer_>, N> {\
+ typedef CVQual PlaceHolder<CVQual TensorMap<T, Options_, MakePointer_>, N> Type;\
};
TENSORMAPEXPR(const)
diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h
index 5742592de..306250f7c 100644
--- a/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h
+++ b/unsupported/Eigen/CXX11/src/Tensor/TensorSyclRun.h
@@ -40,16 +40,17 @@ void run(Expr &expr, Dev &dev) {
dev.sycl_queue().submit([&](cl::sycl::handler &cgh) {
// create a tuple of accessors from Evaluator
auto tuple_of_accessors = internal::createTupleOfAccessors<decltype(evaluator)>(cgh, evaluator);
- size_t range, GRange, tileSize;
- dev.parallel_for_setup(utility::tuple::get<0>(tuple_of_accessors).get_range()[0]/sizeof(typename Expr::Scalar), tileSize, range, GRange);
+ typename Expr::Index range, GRange, tileSize;
+ dev.parallel_for_setup(static_cast<typename Expr::Index>(evaluator.dimensions().TotalSize()), tileSize, range, GRange);
// run the kernel
cgh.parallel_for<PlaceHolderExpr>( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), [=](cl::sycl::nd_item<1> itemID) {
typedef typename internal::ConvertToDeviceExpression<Expr>::Type DevExpr;
auto device_expr =internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors);
auto device_evaluator = Eigen::TensorEvaluator<decltype(device_expr.expr), Eigen::DefaultDevice>(device_expr.expr, Eigen::DefaultDevice());
- if (itemID.get_global_linear_id() < range) {
- device_evaluator.evalScalar(static_cast<typename DevExpr::Index>(itemID.get_global_linear_id()));
+ typename DevExpr::Index gId = static_cast<typename DevExpr::Index>(itemID.get_global_linear_id());
+ if (gId < range) {
+ device_evaluator.evalScalar(gId);
}
});
});
diff --git a/unsupported/test/cxx11_tensor_broadcast_sycl.cpp b/unsupported/test/cxx11_tensor_broadcast_sycl.cpp
index 3dbb8d553..752a61f8a 100644
--- a/unsupported/test/cxx11_tensor_broadcast_sycl.cpp
+++ b/unsupported/test/cxx11_tensor_broadcast_sycl.cpp
@@ -137,14 +137,20 @@ template<typename DataType> void sycl_broadcast_test_per_device(const cl::sycl::
test_broadcast_sycl_fixed<DataType, ColMajor, int>(sycl_device);
test_broadcast_sycl<DataType, ColMajor, int>(sycl_device);
- test_broadcast_sycl_fixed<DataType, RowMajor, int64_t>(sycl_device);
+
test_broadcast_sycl<DataType, RowMajor, int64_t>(sycl_device);
- test_broadcast_sycl_fixed<DataType, ColMajor, int64_t>(sycl_device);
test_broadcast_sycl<DataType, ColMajor, int64_t>(sycl_device);
+ // the folowing two test breaks the intel gpu and amd gpu driver (cannot create opencl kernel)
+ // test_broadcast_sycl_fixed<DataType, RowMajor, int64_t>(sycl_device);
+ // test_broadcast_sycl_fixed<DataType, ColMajor, int64_t>(sycl_device);
}
void test_cxx11_tensor_broadcast_sycl() {
for (const auto& device : cl::sycl::device::get_devices()) {
+ /// get_devices returns all the available opencl devices. Either use device_selector or exclude devices that computecpp does not support (AMD OpenCL for CPU )
+ auto s= device.template get_info<cl::sycl::info::device::vendor>();
+ std::transform(s.begin(), s.end(), s.begin(), ::tolower);
+ if(!device.is_cpu() || s.find("amd")==std::string::npos)
CALL_SUBTEST(sycl_broadcast_test_per_device<float>(device));
}
}
diff --git a/unsupported/test/cxx11_tensor_builtins_sycl.cpp b/unsupported/test/cxx11_tensor_builtins_sycl.cpp
index 989b335b2..dd739f470 100644
--- a/unsupported/test/cxx11_tensor_builtins_sycl.cpp
+++ b/unsupported/test/cxx11_tensor_builtins_sycl.cpp
@@ -264,9 +264,15 @@ static void test_builtin_binary_sycl(const Eigen::SyclDevice &sycl_device) {
}
void test_cxx11_tensor_builtins_sycl() {
- cl::sycl::gpu_selector s;
- QueueInterface queueInterface(s);
- Eigen::SyclDevice sycl_device(&queueInterface);
- CALL_SUBTEST(test_builtin_unary_sycl(sycl_device));
- CALL_SUBTEST(test_builtin_binary_sycl(sycl_device));
+ for (const auto& device : cl::sycl::device::get_devices()) {
+ /// get_devices returns all the available opencl devices. Either use device_selector or exclude devices that computecpp does not support (AMD OpenCL for CPU )
+ auto s= device.template get_info<cl::sycl::info::device::vendor>();
+ std::transform(s.begin(), s.end(), s.begin(), ::tolower);
+ if(!device.is_cpu() || s.find("amd")==std::string::npos){
+ QueueInterface queueInterface(device);
+ Eigen::SyclDevice sycl_device(&queueInterface);
+ CALL_SUBTEST(test_builtin_unary_sycl(sycl_device));
+ CALL_SUBTEST(test_builtin_binary_sycl(sycl_device));
+ }
+ }
}
diff --git a/unsupported/test/cxx11_tensor_device_sycl.cpp b/unsupported/test/cxx11_tensor_device_sycl.cpp
index 9e13d2f1b..7f9372c04 100644
--- a/unsupported/test/cxx11_tensor_device_sycl.cpp
+++ b/unsupported/test/cxx11_tensor_device_sycl.cpp
@@ -72,6 +72,10 @@ template<typename DataType> void sycl_device_test_per_device(const cl::sycl::dev
void test_cxx11_tensor_device_sycl() {
for (const auto& device : cl::sycl::device::get_devices()) {
- CALL_SUBTEST(sycl_device_test_per_device<float>(device));
+ /// get_devices returns all the available opencl devices. Either use device_selector or exclude devices that computecpp does not support (AMD OpenCL for CPU )
+ auto s= device.template get_info<cl::sycl::info::device::vendor>();
+ std::transform(s.begin(), s.end(), s.begin(), ::tolower);
+ if(!device.is_cpu() || s.find("amd")==std::string::npos)
+ CALL_SUBTEST(sycl_device_test_per_device<float>(device));
}
}
diff --git a/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp b/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp
index 70b182558..4ff218cb6 100644
--- a/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp
+++ b/unsupported/test/cxx11_tensor_forced_eval_sycl.cpp
@@ -70,12 +70,11 @@ template <typename DataType, typename Dev_selector> void tensorForced_evalperDev
test_forced_eval_sycl<DataType, ColMajor>(sycl_device);
}
void test_cxx11_tensor_forced_eval_sycl() {
-
- printf("Test on GPU: OpenCL\n");
- CALL_SUBTEST(tensorForced_evalperDevice<float>((cl::sycl::gpu_selector())));
- printf("repeating the test on CPU: OpenCL\n");
- CALL_SUBTEST(tensorForced_evalperDevice<float>((cl::sycl::cpu_selector())));
- printf("repeating the test on CPU: HOST\n");
- CALL_SUBTEST(tensorForced_evalperDevice<float>((cl::sycl::host_selector())));
- printf("Test Passed******************\n" );
+ for (const auto& device : cl::sycl::device::get_devices()) {
+ /// get_devices returns all the available opencl devices. Either use device_selector or exclude devices that computecpp does not support (AMD OpenCL for CPU )
+ auto s= device.template get_info<cl::sycl::info::device::vendor>();
+ std::transform(s.begin(), s.end(), s.begin(), ::tolower);
+ if(!device.is_cpu() || s.find("amd")==std::string::npos)
+ CALL_SUBTEST(tensorForced_evalperDevice<float>(device));
+ }
}
diff --git a/unsupported/test/cxx11_tensor_morphing_sycl.cpp b/unsupported/test/cxx11_tensor_morphing_sycl.cpp
index a16e1caf5..4ca73ea1b 100644
--- a/unsupported/test/cxx11_tensor_morphing_sycl.cpp
+++ b/unsupported/test/cxx11_tensor_morphing_sycl.cpp
@@ -82,14 +82,12 @@ template<typename DataType, typename dev_Selector> void sycl_slicing_test_per_de
}
void test_cxx11_tensor_morphing_sycl()
{
- /// Currentlly it only works on cpu. Adding GPU cause LLVM ERROR in cunstructing OpenCL Kernel at runtime.
-// printf("Test on GPU: OpenCL\n");
-// CALL_SUBTEST(sycl_device_test_per_device((cl::sycl::gpu_selector())));
- printf("repeating the test on CPU: OpenCL\n");
- CALL_SUBTEST(sycl_slicing_test_per_device<float>((cl::sycl::cpu_selector())));
- printf("repeating the test on CPU: HOST\n");
- CALL_SUBTEST(sycl_slicing_test_per_device<float>((cl::sycl::host_selector())));
- printf("Test Passed******************\n" );
-
-
+ for (const auto& device : cl::sycl::device::get_devices()) {
+ /// get_devices returns all the available opencl devices. Either use device_selector or exclude devices that computecpp does not support (AMD OpenCL for CPU )
+ /// Currentlly it only works on cpu. Adding GPU cause LLVM ERROR in cunstructing OpenCL Kernel at runtime.
+ auto s= device.template get_info<cl::sycl::info::device::vendor>();
+ std::transform(s.begin(), s.end(), s.begin(), ::tolower);
+ if(device.is_cpu() && s.find("amd")==std::string::npos)
+ CALL_SUBTEST(sycl_slicing_test_per_device<float>(device));
+ }
}
diff --git a/unsupported/test/cxx11_tensor_reduction_sycl.cpp b/unsupported/test/cxx11_tensor_reduction_sycl.cpp
index 9e20f9cd0..32cfb94c2 100644
--- a/unsupported/test/cxx11_tensor_reduction_sycl.cpp
+++ b/unsupported/test/cxx11_tensor_reduction_sycl.cpp
@@ -142,6 +142,10 @@ template<typename DataType> void sycl_reduction_test_per_device(const cl::sycl::
}
void test_cxx11_tensor_reduction_sycl() {
for (const auto& device : cl::sycl::device::get_devices()) {
- CALL_SUBTEST(sycl_reduction_test_per_device<float>(device));
+ /// get_devices returns all the available opencl devices. Either use device_selector or exclude devices that computecpp does not support (AMD OpenCL for CPU )
+ auto s= device.template get_info<cl::sycl::info::device::vendor>();
+ std::transform(s.begin(), s.end(), s.begin(), ::tolower);
+ if(!device.is_cpu() || s.find("amd")==std::string::npos)
+ CALL_SUBTEST(sycl_reduction_test_per_device<float>(device));
}
}
diff --git a/unsupported/test/cxx11_tensor_sycl.cpp b/unsupported/test/cxx11_tensor_sycl.cpp
index bf115d652..670b5f379 100644
--- a/unsupported/test/cxx11_tensor_sycl.cpp
+++ b/unsupported/test/cxx11_tensor_sycl.cpp
@@ -197,11 +197,11 @@ template<typename DataType, typename dev_Selector> void sycl_computing_test_per_
test_sycl_computations<DataType, ColMajor>(sycl_device);
}
void test_cxx11_tensor_sycl() {
- printf("Test on GPU: OpenCL\n");
- CALL_SUBTEST(sycl_computing_test_per_device<float>((cl::sycl::gpu_selector())));
- printf("repeating the test on CPU: OpenCL\n");
- CALL_SUBTEST(sycl_computing_test_per_device<float>((cl::sycl::cpu_selector())));
- printf("repeating the test on CPU: HOST\n");
- CALL_SUBTEST(sycl_computing_test_per_device<float>((cl::sycl::host_selector())));
- printf("Test Passed******************\n" );
+ for (const auto& device : cl::sycl::device::get_devices()) {
+ /// get_devices returns all the available opencl devices. Either use device_selector or exclude devices that computecpp does not support (AMD OpenCL for CPU )
+ auto s= device.template get_info<cl::sycl::info::device::vendor>();
+ std::transform(s.begin(), s.end(), s.begin(), ::tolower);
+ if(!device.is_cpu() || s.find("amd")==std::string::npos)
+ CALL_SUBTEST(sycl_computing_test_per_device<float>(device));
+ }
}