Adding TensorShuffling backend for sycl; adding TensorReshaping backend for sycl; cleaning up the sycl backend.

author: Mehdi Goli <mehdi.goli@codeplay.com> 2016-11-29 15:30:42 +0000
committer: Mehdi Goli <mehdi.goli@codeplay.com> 2016-11-29 15:30:42 +0000
commit: 577ce78085d2e09675abb5976ab3026235de8eec (patch)
tree: b88f8db6290c625fd35a72594e816b8ff4094e15 /unsupported/test
parent: 02080e2b673c17302872a05e0fac8c20ac756b44 (diff)
4 files changed, 233 insertions, 3 deletions
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index 471826746..0ffa329f5 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -147,6 +147,7 @@ if(EIGEN_TEST_CXX11)
     ei_add_test_sycl(cxx11_tensor_device_sycl "-std=c++11")
     ei_add_test_sycl(cxx11_tensor_reduction_sycl "-std=c++11")
     ei_add_test_sycl(cxx11_tensor_morphing_sycl "-std=c++11")
+    ei_add_test_sycl(cxx11_tensor_shuffling_sycl "-std=c++11")
     ei_add_test_sycl(cxx11_tensor_builtins_sycl "-std=c++11")
   endif(EIGEN_TEST_SYCL)
   # It should be safe to always run these tests as there is some fallback code for
diff --git a/unsupported/test/cxx11_tensor_morphing_sycl.cpp b/unsupported/test/cxx11_tensor_morphing_sycl.cpp
index 9074c8331..d7f4e8cff 100644
--- a/unsupported/test/cxx11_tensor_morphing_sycl.cpp
+++ b/unsupported/test/cxx11_tensor_morphing_sycl.cpp
@@ -29,6 +29,112 @@ using Eigen::Tensor;
 using Eigen::TensorMap;
 
 template <typename DataType, int DataLayout>
+static void test_simple_reshape(const Eigen::SyclDevice& sycl_device)
+{
+  typename Tensor<DataType, 5 ,DataLayout>::Dimensions dim1(2,3,1,7,1);
+  typename Tensor<DataType, 3 ,DataLayout>::Dimensions dim2(2,3,7);
+  typename Tensor<DataType, 2 ,DataLayout>::Dimensions dim3(6,7);
+  typename Tensor<DataType, 2 ,DataLayout>::Dimensions dim4(2,21);
+
+  Tensor<DataType, 5, DataLayout> tensor1(dim1);
+  Tensor<DataType, 3, DataLayout> tensor2(dim2);
+  Tensor<DataType, 2, DataLayout> tensor3(dim3);
+  Tensor<DataType, 2, DataLayout> tensor4(dim4);
+
+  tensor1.setRandom();
+
+  DataType* gpu_data1  = static_cast<DataType*>(sycl_device.allocate(tensor1.size()*sizeof(DataType)));
+  DataType* gpu_data2  = static_cast<DataType*>(sycl_device.allocate(tensor2.size()*sizeof(DataType)));
+  DataType* gpu_data3  = static_cast<DataType*>(sycl_device.allocate(tensor3.size()*sizeof(DataType)));
+  DataType* gpu_data4  = static_cast<DataType*>(sycl_device.allocate(tensor4.size()*sizeof(DataType)));
+
+  TensorMap<Tensor<DataType, 5,DataLayout>> gpu1(gpu_data1, dim1);
+  TensorMap<Tensor<DataType, 3,DataLayout>> gpu2(gpu_data2, dim2);
+  TensorMap<Tensor<DataType, 2,DataLayout>> gpu3(gpu_data3, dim3);
+  TensorMap<Tensor<DataType, 2,DataLayout>> gpu4(gpu_data4, dim4);
+
+  sycl_device.memcpyHostToDevice(gpu_data1, tensor1.data(),(tensor1.size())*sizeof(DataType));
+
+  gpu2.device(sycl_device)=gpu1.reshape(dim2);
+  sycl_device.memcpyDeviceToHost(tensor2.data(), gpu_data2,(tensor1.size())*sizeof(DataType));
+
+  gpu3.device(sycl_device)=gpu1.reshape(dim3);
+  sycl_device.memcpyDeviceToHost(tensor3.data(), gpu_data3,(tensor3.size())*sizeof(DataType));
+
+  gpu4.device(sycl_device)=gpu1.reshape(dim2).reshape(dim4);
+  sycl_device.memcpyDeviceToHost(tensor4.data(), gpu_data4,(tensor4.size())*sizeof(DataType));
+  for (int i = 0; i < 2; ++i){
+    for (int j = 0; j < 3; ++j){
+      for (int k = 0; k < 7; ++k){
+        VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor2(i,j,k));      ///ColMajor
+        if (static_cast<int>(DataLayout) == static_cast<int>(ColMajor)) {
+          VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor3(i+2*j,k));    ///ColMajor
+          VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor4(i,j+3*k));    ///ColMajor
+        }
+        else{
+          //VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor2(i,j,k));      /// RowMajor
+          VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor4(i,j*7 +k));   /// RowMajor
+          VERIFY_IS_EQUAL(tensor1(i,j,0,k,0), tensor3(i*3 +j,k));   /// RowMajor
+        }
+      }
+    }
+  }
+  sycl_device.deallocate(gpu_data1);
+  sycl_device.deallocate(gpu_data2);
+  sycl_device.deallocate(gpu_data3);
+  sycl_device.deallocate(gpu_data4);
+}
+
+
+template<typename DataType, int DataLayout>
+static void test_reshape_as_lvalue(const Eigen::SyclDevice& sycl_device)
+{
+  typename Tensor<DataType, 3, DataLayout>::Dimensions dim1(2,3,7);
+  typename Tensor<DataType, 2, DataLayout>::Dimensions dim2(6,7);
+  typename Tensor<DataType, 5, DataLayout>::Dimensions dim3(2,3,1,7,1);
+  Tensor<DataType, 3, DataLayout> tensor(dim1);
+  Tensor<DataType, 2, DataLayout> tensor2d(dim2);
+  Tensor<DataType, 5, DataLayout> tensor5d(dim3);
+
+  tensor.setRandom();
+
+  DataType* gpu_data1  = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType)));
+  DataType* gpu_data2  = static_cast<DataType*>(sycl_device.allocate(tensor2d.size()*sizeof(DataType)));
+  DataType* gpu_data3  = static_cast<DataType*>(sycl_device.allocate(tensor5d.size()*sizeof(DataType)));
+
+  TensorMap< Tensor<DataType, 3, DataLayout> > gpu1(gpu_data1, dim1);
+  TensorMap< Tensor<DataType, 2, DataLayout> > gpu2(gpu_data2, dim2);
+  TensorMap< Tensor<DataType, 5, DataLayout> > gpu3(gpu_data3, dim3);
+
+  sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType));
+
+  gpu2.reshape(dim1).device(sycl_device)=gpu1;
+  sycl_device.memcpyDeviceToHost(tensor2d.data(), gpu_data2,(tensor2d.size())*sizeof(DataType));
+
+  gpu3.reshape(dim1).device(sycl_device)=gpu1;
+  sycl_device.memcpyDeviceToHost(tensor5d.data(), gpu_data3,(tensor5d.size())*sizeof(DataType));
+
+
+  for (int i = 0; i < 2; ++i){
+    for (int j = 0; j < 3; ++j){
+      for (int k = 0; k < 7; ++k){
+        VERIFY_IS_EQUAL(tensor5d(i,j,0,k,0), tensor(i,j,k));
+        if (static_cast<int>(DataLayout) == static_cast<int>(ColMajor)) {
+          VERIFY_IS_EQUAL(tensor2d(i+2*j,k), tensor(i,j,k));    ///ColMajor
+        }
+        else{
+          VERIFY_IS_EQUAL(tensor2d(i*3 +j,k),tensor(i,j,k));   /// RowMajor
+        }
+      }
+    }
+  }
+  sycl_device.deallocate(gpu_data1);
+  sycl_device.deallocate(gpu_data2);
+  sycl_device.deallocate(gpu_data3);
+}
+
+
+template <typename DataType, int DataLayout>
 static void test_simple_slice(const Eigen::SyclDevice &sycl_device)
 {
   int sizeDim1 = 2;
@@ -74,15 +180,19 @@ static void test_simple_slice(const Eigen::SyclDevice &sycl_device)
   sycl_device.deallocate(gpu_data3);
 }
 
-template<typename DataType, typename dev_Selector> void sycl_slicing_test_per_device(dev_Selector s){
+template<typename DataType, typename dev_Selector> void sycl_morphing_test_per_device(dev_Selector s){
   QueueInterface queueInterface(s);
   auto sycl_device = Eigen::SyclDevice(&queueInterface);
   test_simple_slice<DataType, RowMajor>(sycl_device);
   test_simple_slice<DataType, ColMajor>(sycl_device);
+  test_simple_reshape<DataType, RowMajor>(sycl_device);
+  test_simple_reshape<DataType, ColMajor>(sycl_device);
+  test_reshape_as_lvalue<DataType, RowMajor>(sycl_device);
+  test_reshape_as_lvalue<DataType, ColMajor>(sycl_device);
 }
 void test_cxx11_tensor_morphing_sycl()
 {
   for (const auto& device :Eigen::get_sycl_supported_devices()) {
-    CALL_SUBTEST(sycl_slicing_test_per_device<float>(device));
+    CALL_SUBTEST(sycl_morphing_test_per_device<float>(device));
   }
 }
diff --git a/unsupported/test/cxx11_tensor_shuffling_sycl.cpp b/unsupported/test/cxx11_tensor_shuffling_sycl.cpp
new file mode 100644
index 000000000..b2b75cbde
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_shuffling_sycl.cpp
@@ -0,0 +1,120 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016
+// Mehdi Goli    Codeplay Software Ltd.
+// Ralph Potter  Codeplay Software Ltd.
+// Luke Iwanski  Codeplay Software Ltd.
+// Contact: <eigen@codeplay.com>
+// Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_shuffling_sycl
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_SYCL
+
+
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::array;
+using Eigen::SyclDevice;
+using Eigen::Tensor;
+using Eigen::TensorMap;
+
+template <typename DataType, int DataLayout, typename IndexTypes>
+static void test_simple_shuffling_sycl(const Eigen::SyclDevice& sycl_device)
+{
+  IndexTypes sizeDim1 = 2;
+  IndexTypes sizeDim2 = 3;
+  IndexTypes sizeDim3 = 5;
+  IndexTypes sizeDim4 = 7;
+  array<IndexTypes, 4> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}};
+  Tensor<DataType, 4, DataLayout,IndexTypes> tensor(tensorRange);
+  Tensor<DataType, 4, DataLayout,IndexTypes> no_shuffle(tensorRange);
+  tensor.setRandom();
+
+  const size_t buffSize =tensor.size()*sizeof(DataType);
+  array<IndexTypes, 4> shuffles;
+  shuffles[0] = 0;
+  shuffles[1] = 1;
+  shuffles[2] = 2;
+  shuffles[3] = 3;
+  DataType* gpu_data1  = static_cast<DataType*>(sycl_device.allocate(buffSize));
+  DataType* gpu_data2  = static_cast<DataType*>(sycl_device.allocate(buffSize));
+
+
+  TensorMap<Tensor<DataType, 4, DataLayout,IndexTypes>> gpu1(gpu_data1, tensorRange);
+  TensorMap<Tensor<DataType, 4, DataLayout,IndexTypes>> gpu2(gpu_data2, tensorRange);
+
+  sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(), buffSize);
+
+  gpu2.device(sycl_device)=gpu1.shuffle(shuffles);
+  sycl_device.memcpyDeviceToHost(no_shuffle.data(), gpu_data2, buffSize);
+
+  VERIFY_IS_EQUAL(no_shuffle.dimension(0), sizeDim1);
+  VERIFY_IS_EQUAL(no_shuffle.dimension(1), sizeDim2);
+  VERIFY_IS_EQUAL(no_shuffle.dimension(2), sizeDim3);
+  VERIFY_IS_EQUAL(no_shuffle.dimension(3), sizeDim4);
+
+  for (int i = 0; i < sizeDim1; ++i) {
+    for (int j = 0; j < sizeDim2; ++j) {
+      for (int k = 0; k < sizeDim3; ++k) {
+        for (int l = 0; l < sizeDim4; ++l) {
+          VERIFY_IS_EQUAL(tensor(i,j,k,l), no_shuffle(i,j,k,l));
+        }
+      }
+    }
+  }
+
+  shuffles[0] = 2;
+  shuffles[1] = 3;
+  shuffles[2] = 1;
+  shuffles[3] = 0;
+  array<IndexTypes, 4> tensorrangeShuffle = {{sizeDim3, sizeDim4, sizeDim2, sizeDim1}};
+  Tensor<DataType, 4, DataLayout,IndexTypes> shuffle(tensorrangeShuffle);
+  DataType* gpu_data3  = static_cast<DataType*>(sycl_device.allocate(buffSize));
+  TensorMap<Tensor<DataType, 4,DataLayout,IndexTypes>> gpu3(gpu_data3, tensorrangeShuffle);
+
+    gpu3.device(sycl_device)=gpu1.shuffle(shuffles);
+    sycl_device.memcpyDeviceToHost(shuffle.data(), gpu_data3, buffSize);
+
+  VERIFY_IS_EQUAL(shuffle.dimension(0), sizeDim3);
+  VERIFY_IS_EQUAL(shuffle.dimension(1), sizeDim4);
+  VERIFY_IS_EQUAL(shuffle.dimension(2), sizeDim2);
+  VERIFY_IS_EQUAL(shuffle.dimension(3), sizeDim1);
+
+  for (int i = 0; i < sizeDim1; ++i) {
+    for (int j = 0; j < sizeDim2; ++j) {
+      for (int k = 0; k < sizeDim3; ++k) {
+        for (int l = 0; l < sizeDim4; ++l) {
+          VERIFY_IS_EQUAL(tensor(i,j,k,l), shuffle(k,l,j,i));
+        }
+      }
+    }
+  }
+}
+
+
+template<typename DataType, typename dev_Selector> void sycl_shuffling_test_per_device(dev_Selector s){
+  QueueInterface queueInterface(s);
+  auto sycl_device = Eigen::SyclDevice(&queueInterface);
+  test_simple_shuffling_sycl<DataType, RowMajor, int>(sycl_device);
+  test_simple_shuffling_sycl<DataType, ColMajor, int>(sycl_device);
+
+  test_simple_shuffling_sycl<DataType, RowMajor, int64_t>(sycl_device);
+  test_simple_shuffling_sycl<DataType, ColMajor, int64_t>(sycl_device);
+
+}
+void test_cxx11_tensor_shuffling_sycl()
+{
+  for (const auto& device :Eigen::get_sycl_supported_devices()) {
+    CALL_SUBTEST(sycl_shuffling_test_per_device<float>(device));
+  }
+}
diff --git a/unsupported/test/cxx11_tensor_sycl.cpp b/unsupported/test/cxx11_tensor_sycl.cpp
index 150414f15..4e17a7328 100644
--- a/unsupported/test/cxx11_tensor_sycl.cpp
+++ b/unsupported/test/cxx11_tensor_sycl.cpp
@@ -197,7 +197,6 @@ template<typename DataType, typename dev_Selector> void sycl_computing_test_per_
   test_sycl_computations<DataType, ColMajor>(sycl_device);
 }
 void test_cxx11_tensor_sycl() {
-  auto devices =Eigen::get_sycl_supported_devices();
   for (const auto& device :Eigen::get_sycl_supported_devices()) {
     CALL_SUBTEST(sycl_computing_test_per_device<float>(device));
   }
author	Mehdi Goli <mehdi.goli@codeplay.com>	2016-11-29 15:30:42 +0000
committer	Mehdi Goli <mehdi.goli@codeplay.com>	2016-11-29 15:30:42 +0000
commit	577ce78085d2e09675abb5976ab3026235de8eec (patch)
tree	b88f8db6290c625fd35a72594e816b8ff4094e15 /unsupported/test
parent	02080e2b673c17302872a05e0fac8c20ac756b44 (diff)