From 3804ca0d905a0a03357db50abc7468f5f90abc98 Mon Sep 17 00:00:00 2001 From: Turing Eret Date: Fri, 23 Apr 2021 07:43:35 -0600 Subject: Fix for issue with static global variables in TensorDeviceGpu.h m_deviceProperties and m_devicePropInitialized are defined as global statics which will define multiple copies which can cause issues if initializeDeviceProp() is called in one translation unit and then m_deviceProperties is used in a different translation unit. Added inline functions getDeviceProperties() and getDevicePropInitialized() which defines those variables as static locals. As per the C++ standard 7.1.2/4, a static local declared in an inline function always refers to the same object, so this should be safer. Credit to Sun Chenggen for this fix. This fixes issue #1475. --- .../Eigen/CXX11/src/Tensor/TensorDeviceGpu.h | 23 ++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'unsupported/Eigen') diff --git a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h index 9422dcd7a..d5eff9dc4 100644 --- a/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h +++ b/unsupported/Eigen/CXX11/src/Tensor/TensorDeviceGpu.h @@ -42,11 +42,18 @@ class StreamInterface { virtual unsigned int* semaphore() const = 0; }; -static gpuDeviceProp_t* m_deviceProperties; -static bool m_devicePropInitialized = false; +EIGEN_STRONG_INLINE gpuDeviceProp_t*& getDeviceProperties() { + static gpuDeviceProp_t* deviceProperties; + return deviceProperties; +} + +EIGEN_STRONG_INLINE bool& getDevicePropInitialized() { + static bool devicePropInitialized = false; + return devicePropInitialized; +} static void initializeDeviceProp() { - if (!m_devicePropInitialized) { + if (!getDevicePropInitialized()) { // Attempts to ensure proper behavior in the case of multiple threads // calling this function simultaneously. This would be trivial to // implement if we could use std::mutex, but unfortunately mutex don't @@ -64,9 +71,9 @@ static void initializeDeviceProp() { << std::endl; gpu_assert(status == gpuSuccess); } - m_deviceProperties = new gpuDeviceProp_t[num_devices]; + getDeviceProperties() = new gpuDeviceProp_t[num_devices]; for (int i = 0; i < num_devices; ++i) { - status = gpuGetDeviceProperties(&m_deviceProperties[i], i); + status = gpuGetDeviceProperties(&getDeviceProperties()[i], i); if (status != gpuSuccess) { std::cerr << "Failed to initialize GPU device #" << i @@ -78,10 +85,10 @@ static void initializeDeviceProp() { } std::atomic_thread_fence(std::memory_order_release); - m_devicePropInitialized = true; + getDevicePropInitialized() = true; } else { // Wait for the other thread to inititialize the properties. - while (!m_devicePropInitialized) { + while (!getDevicePropInitialized()) { std::atomic_thread_fence(std::memory_order_acquire); std::this_thread::sleep_for(std::chrono::milliseconds(1000)); } @@ -129,7 +136,7 @@ class GpuStreamDevice : public StreamInterface { const gpuStream_t& stream() const { return *stream_; } const gpuDeviceProp_t& deviceProperties() const { - return m_deviceProperties[device_]; + return getDeviceProperties()[device_]; } virtual void* allocate(size_t num_bytes) const { gpuError_t err = gpuSetDevice(device_); -- cgit v1.2.3