aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-03-03 10:37:25 -0800
committerGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-03-03 10:37:25 -0800
commitdac58d7c3599a1c5c7631a734ca95a60ddb549ef (patch)
treebc915db94ab96b20dd30501af3b2954e77278e21
parent1032441c6fea0a0d98b394abe8ffdb228256f47b (diff)
Added a test to validate the conversion of half floats into floats on Kepler GPUs.
Restricted the testing of the random number generation code to GPU architecture greater than or equal to 3.5.
-rw-r--r--unsupported/test/CMakeLists.txt9
-rw-r--r--unsupported/test/cxx11_tensor_cast_float16_cuda.cu73
2 files changed, 80 insertions, 2 deletions
diff --git a/unsupported/test/CMakeLists.txt b/unsupported/test/CMakeLists.txt
index 3be43f47f..bc9248b9e 100644
--- a/unsupported/test/CMakeLists.txt
+++ b/unsupported/test/CMakeLists.txt
@@ -170,10 +170,15 @@ if(CUDA_FOUND)
ei_add_test(cxx11_tensor_cuda)
ei_add_test(cxx11_tensor_contract_cuda)
ei_add_test(cxx11_tensor_reduction_cuda)
- ei_add_test(cxx11_tensor_random_cuda)
ei_add_test(cxx11_tensor_argmax_cuda)
+ ei_add_test(cxx11_tensor_cast_float16_cuda)
- # Half floats are only supported starting with arch 5.3
+ # The random number generation code requires arch 3.5 or greater.
+ if (${EIGEN_CUDA_COMPUTE_ARCH} GREATER 34)
+ ei_add_test(cxx11_tensor_random_cuda)
+ endif()
+
+ # Operations other that casting of half floats are only supported starting with arch 5.3
if (${EIGEN_CUDA_COMPUTE_ARCH} GREATER 52)
ei_add_test(cxx11_tensor_of_float16_cuda)
endif()
diff --git a/unsupported/test/cxx11_tensor_cast_float16_cuda.cu b/unsupported/test/cxx11_tensor_cast_float16_cuda.cu
new file mode 100644
index 000000000..7936a9126
--- /dev/null
+++ b/unsupported/test/cxx11_tensor_cast_float16_cuda.cu
@@ -0,0 +1,73 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+#define EIGEN_TEST_FUNC cxx11_tensor_cast_float16_cuda
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_GPU
+
+
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+using Eigen::Tensor;
+
+#ifdef EIGEN_HAS_CUDA_FP16
+
+void test_cuda_conversion() {
+ Eigen::CudaStreamDevice stream;
+ Eigen::GpuDevice gpu_device(&stream);
+ int num_elem = 101;
+
+ Tensor<float, 1> floats(num_elem);
+ floats.setRandom();
+
+ float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+ half* d_half = (half*)gpu_device.allocate(num_elem * sizeof(half));
+ float* d_conv = (float*)gpu_device.allocate(num_elem * sizeof(float));
+
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
+ d_float, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<half, 1>, Eigen::Aligned> gpu_half(
+ d_half, num_elem);
+ Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_conv(
+ d_conv, num_elem);
+
+ gpu_device.memcpyHostToDevice(d_float, floats.data(), num_elem*sizeof(float));
+
+ gpu_half.device(gpu_device) = gpu_float.cast<half>();
+ gpu_conv.device(gpu_device) = gpu_half.cast<float>();
+
+ Tensor<float, 1> initial(num_elem);
+ Tensor<float, 1> final(num_elem);
+ gpu_device.memcpyDeviceToHost(initial.data(), d_float, num_elem*sizeof(float));
+ gpu_device.memcpyDeviceToHost(final.data(), d_conv, num_elem*sizeof(float));
+ gpu_device.synchronize();
+
+ for (int i = 0; i < num_elem; ++i) {
+ VERIFY_IS_APPROX(initial(i), final(i));
+ }
+
+ gpu_device.deallocate(d_float);
+ gpu_device.deallocate(d_half);
+ gpu_device.deallocate(d_conv);
+}
+
+#endif
+
+
+void test_cxx11_tensor_cast_float16_cuda()
+{
+#ifdef EIGEN_HAS_CUDA_FP16
+ CALL_SUBTEST(test_cuda_conversion());
+#else
+ std::cout << "Half floats are not supported by this version of cuda: skipping the test" << std::endl;
+#endif
+}