aboutsummaryrefslogtreecommitdiffhomepage
path: root/unsupported/test/cxx11_tensor_reduction_hip.cu
diff options
context:
space:
mode:
Diffstat (limited to 'unsupported/test/cxx11_tensor_reduction_hip.cu')
-rw-r--r--unsupported/test/cxx11_tensor_reduction_hip.cu154
1 files changed, 0 insertions, 154 deletions
diff --git a/unsupported/test/cxx11_tensor_reduction_hip.cu b/unsupported/test/cxx11_tensor_reduction_hip.cu
deleted file mode 100644
index c5aad05be..000000000
--- a/unsupported/test/cxx11_tensor_reduction_hip.cu
+++ /dev/null
@@ -1,154 +0,0 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#define EIGEN_TEST_NO_LONGDOUBLE
-#define EIGEN_TEST_NO_COMPLEX
-#define EIGEN_TEST_FUNC cxx11_tensor_reduction_hip
-#define EIGEN_USE_GPU
-
-#include "main.h"
-#include <unsupported/Eigen/CXX11/Tensor>
-
-
-template<typename Type, int DataLayout>
-static void test_full_reductions() {
-
- Eigen::HipStreamDevice stream;
- Eigen::GpuDevice gpu_device(&stream);
-
- const int num_rows = internal::random<int>(1024, 5*1024);
- const int num_cols = internal::random<int>(1024, 5*1024);
-
- Tensor<Type, 2, DataLayout> in(num_rows, num_cols);
- in.setRandom();
-
- Tensor<Type, 0, DataLayout> full_redux;
- full_redux = in.sum();
-
- std::size_t in_bytes = in.size() * sizeof(Type);
- std::size_t out_bytes = full_redux.size() * sizeof(Type);
- Type* gpu_in_ptr = static_cast<Type*>(gpu_device.allocate(in_bytes));
- Type* gpu_out_ptr = static_cast<Type*>(gpu_device.allocate(out_bytes));
- gpu_device.memcpyHostToDevice(gpu_in_ptr, in.data(), in_bytes);
-
- TensorMap<Tensor<Type, 2, DataLayout> > in_gpu(gpu_in_ptr, num_rows, num_cols);
- TensorMap<Tensor<Type, 0, DataLayout> > out_gpu(gpu_out_ptr);
-
- out_gpu.device(gpu_device) = in_gpu.sum();
-
- Tensor<Type, 0, DataLayout> full_redux_gpu;
- gpu_device.memcpyDeviceToHost(full_redux_gpu.data(), gpu_out_ptr, out_bytes);
- gpu_device.synchronize();
-
- // Check that the CPU and GPU reductions return the same result.
- VERIFY_IS_APPROX(full_redux(), full_redux_gpu());
-
- gpu_device.deallocate(gpu_in_ptr);
- gpu_device.deallocate(gpu_out_ptr);
-}
-
-template<typename Type, int DataLayout>
-static void test_first_dim_reductions() {
- int dim_x = 33;
- int dim_y = 1;
- int dim_z = 128;
-
- Tensor<Type, 3, DataLayout> in(dim_x, dim_y, dim_z);
- in.setRandom();
-
- Eigen::array<int, 1> red_axis;
- red_axis[0] = 0;
- Tensor<Type, 2, DataLayout> redux = in.sum(red_axis);
-
- // Create device
- Eigen::HipStreamDevice stream;
- Eigen::GpuDevice dev(&stream);
-
- // Create data(T)
- Type* in_data = (Type*)dev.allocate(dim_x*dim_y*dim_z*sizeof(Type));
- Type* out_data = (Type*)dev.allocate(dim_z*dim_y*sizeof(Type));
- Eigen::TensorMap<Eigen::Tensor<Type, 3, DataLayout> > gpu_in(in_data, dim_x, dim_y, dim_z);
- Eigen::TensorMap<Eigen::Tensor<Type, 2, DataLayout> > gpu_out(out_data, dim_y, dim_z);
-
- // Perform operation
- dev.memcpyHostToDevice(in_data, in.data(), in.size()*sizeof(Type));
- gpu_out.device(dev) = gpu_in.sum(red_axis);
- gpu_out.device(dev) += gpu_in.sum(red_axis);
- Tensor<Type, 2, DataLayout> redux_gpu(dim_y, dim_z);
- dev.memcpyDeviceToHost(redux_gpu.data(), out_data, gpu_out.size()*sizeof(Type));
- dev.synchronize();
-
- // Check that the CPU and GPU reductions return the same result.
- for (int i = 0; i < gpu_out.size(); ++i) {
- VERIFY_IS_APPROX(2*redux(i), redux_gpu(i));
- }
-
- dev.deallocate(in_data);
- dev.deallocate(out_data);
-}
-
-template<typename Type, int DataLayout>
-static void test_last_dim_reductions() {
- int dim_x = 128;
- int dim_y = 1;
- int dim_z = 33;
-
- Tensor<Type, 3, DataLayout> in(dim_x, dim_y, dim_z);
- in.setRandom();
-
- Eigen::array<int, 1> red_axis;
- red_axis[0] = 2;
- Tensor<Type, 2, DataLayout> redux = in.sum(red_axis);
-
- // Create device
- Eigen::HipStreamDevice stream;
- Eigen::GpuDevice dev(&stream);
-
- // Create data
- Type* in_data = (Type*)dev.allocate(dim_x*dim_y*dim_z*sizeof(Type));
- Type* out_data = (Type*)dev.allocate(dim_x*dim_y*sizeof(Type));
- Eigen::TensorMap<Eigen::Tensor<Type, 3, DataLayout> > gpu_in(in_data, dim_x, dim_y, dim_z);
- Eigen::TensorMap<Eigen::Tensor<Type, 2, DataLayout> > gpu_out(out_data, dim_x, dim_y);
-
- // Perform operation
- dev.memcpyHostToDevice(in_data, in.data(), in.size()*sizeof(Type));
- gpu_out.device(dev) = gpu_in.sum(red_axis);
- gpu_out.device(dev) += gpu_in.sum(red_axis);
- Tensor<Type, 2, DataLayout> redux_gpu(dim_x, dim_y);
- dev.memcpyDeviceToHost(redux_gpu.data(), out_data, gpu_out.size()*sizeof(Type));
- dev.synchronize();
-
- // Check that the CPU and GPU reductions return the same result.
- for (int i = 0; i < gpu_out.size(); ++i) {
- VERIFY_IS_APPROX(2*redux(i), redux_gpu(i));
- }
-
- dev.deallocate(in_data);
- dev.deallocate(out_data);
-}
-
-
-void test_cxx11_tensor_reduction_hip() {
- CALL_SUBTEST((test_full_reductions<float, ColMajor>()));
- CALL_SUBTEST((test_full_reductions<double, ColMajor>()));
- CALL_SUBTEST((test_full_reductions<float, RowMajor>()));
- CALL_SUBTEST((test_full_reductions<double, RowMajor>()));
-
- CALL_SUBTEST((test_first_dim_reductions<float, ColMajor>()));
- CALL_SUBTEST((test_first_dim_reductions<double, ColMajor>()));
- CALL_SUBTEST((test_first_dim_reductions<float, RowMajor>()));
-// Outer reductions of doubles aren't supported just yet.
-// CALL_SUBTEST((test_first_dim_reductions<double, RowMajor>()))
-
- CALL_SUBTEST((test_last_dim_reductions<float, ColMajor>()));
-// Outer reductions of doubles aren't supported just yet.
-// CALL_SUBTEST((test_last_dim_reductions<double, ColMajor>()));
- CALL_SUBTEST((test_last_dim_reductions<float, RowMajor>()));
- CALL_SUBTEST((test_last_dim_reductions<double, RowMajor>()));
-}