diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2018-05-18 06:31:20 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-05-18 06:33:41 -0700 |
commit | 2934484b3a4802c3b4644e6fc9a2b1c647d2eb9a (patch) | |
tree | d62a6167ea2bec029663cc06bda8dbebc2938f39 /tensorflow/stream_executor | |
parent | 68546a6cfd18ac1a16f6d6a1843882aea4243f55 (diff) |
Dropping support for CUDA < 8.
PiperOrigin-RevId: 197137612
Diffstat (limited to 'tensorflow/stream_executor')
-rw-r--r-- | tensorflow/stream_executor/cuda/cuda_blas.cc | 14 | ||||
-rw-r--r-- | tensorflow/stream_executor/cuda/cuda_driver.cc | 108 | ||||
-rw-r--r-- | tensorflow/stream_executor/cuda/cuda_driver.h | 2 |
3 files changed, 44 insertions, 80 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_blas.cc b/tensorflow/stream_executor/cuda/cuda_blas.cc index dcc3f7ac98..3e9a23c658 100644 --- a/tensorflow/stream_executor/cuda/cuda_blas.cc +++ b/tensorflow/stream_executor/cuda/cuda_blas.cc @@ -16,11 +16,7 @@ limitations under the License. #include "cuda/include/cublas_v2.h" #include "cuda/include/cuda.h" -#if CUDA_VERSION >= 8000 #define SE_CUDA_DATA_HALF CUDA_R_16F -#else -#define SE_CUDA_DATA_HALF CUBLAS_DATA_HALF -#endif #include "tensorflow/stream_executor/cuda/cuda_blas.h" @@ -45,10 +41,8 @@ limitations under the License. // approach when the issue is fixed. #if CUDA_VERSION < 9000 #include "cuda/include/cuda_fp16.h" -#if CUDA_VERSION >= 7050 #define EIGEN_HAS_CUDA_FP16 #endif -#endif #include "third_party/eigen3/Eigen/Core" @@ -543,9 +537,7 @@ cublasSideMode_t CUDABlasSide(blas::Side side) { // blas::ComputationType to a cudaDataType_t. // // These are used to build the argument type and computation type args to -// cublasGemmEx. cublasGemmEx and cudaDataType_t are available only on -// CUDA >= 8.0. -#if CUDA_VERSION >= 8000 +// cublasGemmEx. template <typename T> struct CUDADataType; @@ -620,8 +612,6 @@ cudaDataType_t CUDAComputationType(blas::ComputationType ty) { return CUDA_C_64F; } } -#endif - } // namespace template <typename FuncT, typename... Args> @@ -2229,7 +2219,6 @@ bool CUDABlas::GetBlasGemmAlgorithms( // Note that when CUDA version and compute capability is not sufficient, we // still return the out_algorithms. Caller needs to make sure that in this case, // the returned vector is empty. -#if CUDA_VERSION >= 8000 for (cublasGemmAlgo_t algo : { CUBLAS_GEMM_DFALT, CUBLAS_GEMM_ALGO0, CUBLAS_GEMM_ALGO1, CUBLAS_GEMM_ALGO2, CUBLAS_GEMM_ALGO3, CUBLAS_GEMM_ALGO4, @@ -2245,7 +2234,6 @@ bool CUDABlas::GetBlasGemmAlgorithms( }) { out_algorithms->push_back(algo); } -#endif return true; } diff --git a/tensorflow/stream_executor/cuda/cuda_driver.cc b/tensorflow/stream_executor/cuda/cuda_driver.cc index e7e4192dfc..273ed83997 100644 --- a/tensorflow/stream_executor/cuda/cuda_driver.cc +++ b/tensorflow/stream_executor/cuda/cuda_driver.cc @@ -26,16 +26,16 @@ limitations under the License. #include "tensorflow/stream_executor/lib/env.h" #include "tensorflow/stream_executor/lib/error.h" #include "tensorflow/stream_executor/lib/human_readable.h" +#include "tensorflow/stream_executor/lib/inlined_vector.h" #include "tensorflow/stream_executor/lib/notification.h" -#include "tensorflow/stream_executor/lib/threadpool.h" #include "tensorflow/stream_executor/lib/stacktrace.h" #include "tensorflow/stream_executor/lib/static_threadlocal.h" #include "tensorflow/stream_executor/lib/strcat.h" #include "tensorflow/stream_executor/lib/stringprintf.h" +#include "tensorflow/stream_executor/lib/threadpool.h" #include "tensorflow/stream_executor/platform/logging.h" #include "tensorflow/stream_executor/platform/mutex.h" #include "tensorflow/stream_executor/platform/port.h" -#include "tensorflow/stream_executor/lib/inlined_vector.h" bool FLAGS_gpuexec_cuda_driver_inject_init_error = false; bool FLAGS_gpuexec_cuda_sync_around_driver_calls = false; @@ -204,11 +204,11 @@ string ToString(CUresult result) { case 719: return "CUDA_ERROR_LAUNCH_FAILED"; - OSTREAM_CUDA_ERROR(CONTEXT_ALREADY_IN_USE) - OSTREAM_CUDA_ERROR(PEER_ACCESS_UNSUPPORTED) - OSTREAM_CUDA_ERROR(NOT_PERMITTED) - OSTREAM_CUDA_ERROR(NOT_SUPPORTED) - OSTREAM_CUDA_ERROR(UNKNOWN) // Unknown internal error to CUDA. + OSTREAM_CUDA_ERROR(CONTEXT_ALREADY_IN_USE) + OSTREAM_CUDA_ERROR(PEER_ACCESS_UNSUPPORTED) + OSTREAM_CUDA_ERROR(NOT_PERMITTED) + OSTREAM_CUDA_ERROR(NOT_SUPPORTED) + OSTREAM_CUDA_ERROR(UNKNOWN) // Unknown internal error to CUDA. default: return port::StrCat("CUresult(", static_cast<int>(result), ")"); } @@ -470,7 +470,8 @@ bool DeviceOptionsToContextFlags(const DeviceOptions &device_options, } /* static */ port::Status CUDADriver::CreateContext( - CUdevice device, DeviceOptions device_options, CudaContext** context) { + CUdevice device, const DeviceOptions &device_options, + CudaContext **context) { *context = nullptr; int flags = 0; @@ -481,62 +482,45 @@ bool DeviceOptionsToContextFlags(const DeviceOptions &device_options, CUresult res; CUcontext former_context; CUcontext new_context; - { - // TODO(leary) Need to see if NVIDIA can expunge the leakiness in their - // context creation: see http://b/13248943 -#if CUDA_VERSION >= 7000 - { - unsigned int former_primary_context_flags; - int former_primary_context_is_active; - CHECK_EQ(CUDA_SUCCESS, - cuDevicePrimaryCtxGetState(device, &former_primary_context_flags, - &former_primary_context_is_active)); - if (former_primary_context_flags != flags) { - if (former_primary_context_is_active) { - LOG(ERROR) - << "The primary context is active and has a different flag set (" - << former_primary_context_flags << ") than the desired flag set (" - << flags << ")."; - } else { - CHECK_EQ(CUDA_SUCCESS, cuDevicePrimaryCtxSetFlags(device, flags)); - } - } + unsigned int former_primary_context_flags; + int former_primary_context_is_active; + CHECK_EQ(CUDA_SUCCESS, + cuDevicePrimaryCtxGetState(device, &former_primary_context_flags, + &former_primary_context_is_active)); + if (former_primary_context_flags != flags) { + if (former_primary_context_is_active) { + LOG(ERROR) + << "The primary context is active and has a different flag set (" + << former_primary_context_flags << ") than the desired flag set (" + << flags << ")."; + } else { + CHECK_EQ(CUDA_SUCCESS, cuDevicePrimaryCtxSetFlags(device, flags)); } + } - former_context = CUDADriver::CurrentContextOrDie(); - res = cuDevicePrimaryCtxRetain(&new_context, device); - if (former_context != nullptr) { - CUdevice former_device; - if (cuCtxGetDevice(&former_device) == CUDA_SUCCESS) { - if (former_device == device) { - if (former_context == new_context) { - VLOG(2) << "The primary context " << former_context - << " for device " << device - << " exists before initializing the StreamExecutor."; - } else { - LOG(WARNING) - << "A non-primary context " << former_context << " for device " - << device - << " exists before initializing the StreamExecutor. The " - << "primary context is now " << new_context << ". We " - << "haven't verified StreamExecutor works with that."; - } + former_context = CUDADriver::CurrentContextOrDie(); + res = cuDevicePrimaryCtxRetain(&new_context, device); + if (former_context != nullptr) { + CUdevice former_device; + if (cuCtxGetDevice(&former_device) == CUDA_SUCCESS) { + if (former_device == device) { + if (former_context == new_context) { + VLOG(2) << "The primary context " << former_context << " for device " + << device + << " exists before initializing the StreamExecutor."; + } else { + LOG(WARNING) << "A non-primary context " << former_context + << " for device " << device + << " exists before initializing the StreamExecutor. The " + << "primary context is now " << new_context << ". We " + << "haven't verified StreamExecutor works with that."; } - } else { - LOG(ERROR) << "Failed to get the device of the current context " - << former_context; } + } else { + LOG(ERROR) << "Failed to get the device of the current context " + << former_context; } -#else - former_context = CurrentContext(); - if (former_context != nullptr) { - LOG(WARNING) - << "creating context when one is currently active; existing: " - << former_context; - } - res = cuCtxCreate(&new_context, flags, device); -#endif } CHECK_EQ(CUDA_SUCCESS, cuCtxSetCurrent(former_context)); @@ -548,11 +532,7 @@ bool DeviceOptionsToContextFlags(const DeviceOptions &device_options, return port::Status::OK(); } -#if CUDA_VERSION >= 7000 string message = "failed call to cuDevicePrimaryCtxRetain: " + ToString(res); -#else - string message = "failed call to cuCtxCreate: " + ToString(res); -#endif if (res == CUDA_ERROR_OUT_OF_MEMORY) { uint64 total_memory; if (GetDeviceTotalMemory(device, &total_memory)) { @@ -569,7 +549,6 @@ bool DeviceOptionsToContextFlags(const DeviceOptions &device_options, if (context == nullptr) { return; } -#if CUDA_VERSION >= 7000 CUcontext former_context = CurrentContext(); CUresult res = cuCtxSetCurrent(context->context()); CUdevice device; @@ -577,9 +556,6 @@ bool DeviceOptionsToContextFlags(const DeviceOptions &device_options, cuCtxSetCurrent(former_context); res = cuDevicePrimaryCtxRelease(device); -#else - CUresult res = cuCtxDestroy(context->context()); -#endif if (res != CUDA_SUCCESS) { LOG(ERROR) << "failed to release CUDA context; leaking: " << ToString(res); diff --git a/tensorflow/stream_executor/cuda/cuda_driver.h b/tensorflow/stream_executor/cuda/cuda_driver.h index a9969e247e..b952cfaf68 100644 --- a/tensorflow/stream_executor/cuda/cuda_driver.h +++ b/tensorflow/stream_executor/cuda/cuda_driver.h @@ -147,7 +147,7 @@ class CUDADriver { // userspace processes is given here: // http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__CTX.html#group__CUDA__CTX_1g65dc0012348bc84810e2103a40d8e2cf static port::Status CreateContext(CUdevice device, - DeviceOptions device_options, + const DeviceOptions& device_options, CudaContext** context); // Destroys the provided context via cuCtxDestroy. |