From 6565f8d60fc0cadd1935ab2dee522ec52829e615 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 26 Sep 2016 11:00:32 -0700 Subject: Made the initialization of a CUDA device thread safe. --- .../Eigen/CXX11/src/Tensor/TensorDeviceCuda.h | 28 +++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h index 28c6f7626..4f5767bc7 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceCuda.h @@ -42,7 +42,21 @@ static bool m_devicePropInitialized = false; static void initializeDeviceProp() { if (!m_devicePropInitialized) { - if (!m_devicePropInitialized) { + // Attempts to ensure proper behavior in the case of multiple threads + // calling this function simultaneously. This would be trivial to + // implement if we could use std::mutex, but unfortunately mutex don't + // compile with nvcc, so we resort to atomics and thread fences instead. + // Note that if the caller uses a compiler that doesn't support c++11 we + // can't ensure that the initialization is thread safe. +#if __cplusplus >= 201103L + static std::atomic first(true); + if (first.exchange(false)) { +#else + static bool first = true; + if (first) { + first = false; +#endif + // We're the first thread to reach this point. int num_devices; cudaError_t status = cudaGetDeviceCount(&num_devices); if (status != cudaSuccess) { @@ -63,7 +77,19 @@ static void initializeDeviceProp() { assert(status == cudaSuccess); } } + +#if __cplusplus >= 201103L + std::atomic_thread_fence(std::memory_order_release); +#endif m_devicePropInitialized = true; + } else { + // Wait for the other thread to inititialize the properties. + while (!m_devicePropInitialized) { +#if __cplusplus >= 201103L + std::atomic_thread_fence(std::memory_order_acquire); +#endif + sleep(1); + } } } } -- cgit v1.2.3