aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/cuda
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-05-18 06:31:20 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-05-18 06:33:41 -0700
commit2934484b3a4802c3b4644e6fc9a2b1c647d2eb9a (patch)
treed62a6167ea2bec029663cc06bda8dbebc2938f39 /tensorflow/stream_executor/cuda
parent68546a6cfd18ac1a16f6d6a1843882aea4243f55 (diff)
Dropping support for CUDA < 8.
PiperOrigin-RevId: 197137612
Diffstat (limited to 'tensorflow/stream_executor/cuda')
-rw-r--r--tensorflow/stream_executor/cuda/cuda_blas.cc14
-rw-r--r--tensorflow/stream_executor/cuda/cuda_driver.cc108
-rw-r--r--tensorflow/stream_executor/cuda/cuda_driver.h2
3 files changed, 44 insertions, 80 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_blas.cc b/tensorflow/stream_executor/cuda/cuda_blas.cc
index dcc3f7ac98..3e9a23c658 100644
--- a/tensorflow/stream_executor/cuda/cuda_blas.cc
+++ b/tensorflow/stream_executor/cuda/cuda_blas.cc
@@ -16,11 +16,7 @@ limitations under the License.
#include "cuda/include/cublas_v2.h"
#include "cuda/include/cuda.h"
-#if CUDA_VERSION >= 8000
#define SE_CUDA_DATA_HALF CUDA_R_16F
-#else
-#define SE_CUDA_DATA_HALF CUBLAS_DATA_HALF
-#endif
#include "tensorflow/stream_executor/cuda/cuda_blas.h"
@@ -45,10 +41,8 @@ limitations under the License.
// approach when the issue is fixed.
#if CUDA_VERSION < 9000
#include "cuda/include/cuda_fp16.h"
-#if CUDA_VERSION >= 7050
#define EIGEN_HAS_CUDA_FP16
#endif
-#endif
#include "third_party/eigen3/Eigen/Core"
@@ -543,9 +537,7 @@ cublasSideMode_t CUDABlasSide(blas::Side side) {
// blas::ComputationType to a cudaDataType_t.
//
// These are used to build the argument type and computation type args to
-// cublasGemmEx. cublasGemmEx and cudaDataType_t are available only on
-// CUDA >= 8.0.
-#if CUDA_VERSION >= 8000
+// cublasGemmEx.
template <typename T>
struct CUDADataType;
@@ -620,8 +612,6 @@ cudaDataType_t CUDAComputationType(blas::ComputationType ty) {
return CUDA_C_64F;
}
}
-#endif
-
} // namespace
template <typename FuncT, typename... Args>
@@ -2229,7 +2219,6 @@ bool CUDABlas::GetBlasGemmAlgorithms(
// Note that when CUDA version and compute capability is not sufficient, we
// still return the out_algorithms. Caller needs to make sure that in this case,
// the returned vector is empty.
-#if CUDA_VERSION >= 8000
for (cublasGemmAlgo_t algo : {
CUBLAS_GEMM_DFALT, CUBLAS_GEMM_ALGO0, CUBLAS_GEMM_ALGO1,
CUBLAS_GEMM_ALGO2, CUBLAS_GEMM_ALGO3, CUBLAS_GEMM_ALGO4,
@@ -2245,7 +2234,6 @@ bool CUDABlas::GetBlasGemmAlgorithms(
}) {
out_algorithms->push_back(algo);
}
-#endif
return true;
}
diff --git a/tensorflow/stream_executor/cuda/cuda_driver.cc b/tensorflow/stream_executor/cuda/cuda_driver.cc
index e7e4192dfc..273ed83997 100644
--- a/tensorflow/stream_executor/cuda/cuda_driver.cc
+++ b/tensorflow/stream_executor/cuda/cuda_driver.cc
@@ -26,16 +26,16 @@ limitations under the License.
#include "tensorflow/stream_executor/lib/env.h"
#include "tensorflow/stream_executor/lib/error.h"
#include "tensorflow/stream_executor/lib/human_readable.h"
+#include "tensorflow/stream_executor/lib/inlined_vector.h"
#include "tensorflow/stream_executor/lib/notification.h"
-#include "tensorflow/stream_executor/lib/threadpool.h"
#include "tensorflow/stream_executor/lib/stacktrace.h"
#include "tensorflow/stream_executor/lib/static_threadlocal.h"
#include "tensorflow/stream_executor/lib/strcat.h"
#include "tensorflow/stream_executor/lib/stringprintf.h"
+#include "tensorflow/stream_executor/lib/threadpool.h"
#include "tensorflow/stream_executor/platform/logging.h"
#include "tensorflow/stream_executor/platform/mutex.h"
#include "tensorflow/stream_executor/platform/port.h"
-#include "tensorflow/stream_executor/lib/inlined_vector.h"
bool FLAGS_gpuexec_cuda_driver_inject_init_error = false;
bool FLAGS_gpuexec_cuda_sync_around_driver_calls = false;
@@ -204,11 +204,11 @@ string ToString(CUresult result) {
case 719:
return "CUDA_ERROR_LAUNCH_FAILED";
- OSTREAM_CUDA_ERROR(CONTEXT_ALREADY_IN_USE)
- OSTREAM_CUDA_ERROR(PEER_ACCESS_UNSUPPORTED)
- OSTREAM_CUDA_ERROR(NOT_PERMITTED)
- OSTREAM_CUDA_ERROR(NOT_SUPPORTED)
- OSTREAM_CUDA_ERROR(UNKNOWN) // Unknown internal error to CUDA.
+ OSTREAM_CUDA_ERROR(CONTEXT_ALREADY_IN_USE)
+ OSTREAM_CUDA_ERROR(PEER_ACCESS_UNSUPPORTED)
+ OSTREAM_CUDA_ERROR(NOT_PERMITTED)
+ OSTREAM_CUDA_ERROR(NOT_SUPPORTED)
+ OSTREAM_CUDA_ERROR(UNKNOWN) // Unknown internal error to CUDA.
default:
return port::StrCat("CUresult(", static_cast<int>(result), ")");
}
@@ -470,7 +470,8 @@ bool DeviceOptionsToContextFlags(const DeviceOptions &device_options,
}
/* static */ port::Status CUDADriver::CreateContext(
- CUdevice device, DeviceOptions device_options, CudaContext** context) {
+ CUdevice device, const DeviceOptions &device_options,
+ CudaContext **context) {
*context = nullptr;
int flags = 0;
@@ -481,62 +482,45 @@ bool DeviceOptionsToContextFlags(const DeviceOptions &device_options,
CUresult res;
CUcontext former_context;
CUcontext new_context;
- {
- // TODO(leary) Need to see if NVIDIA can expunge the leakiness in their
- // context creation: see http://b/13248943
-#if CUDA_VERSION >= 7000
- {
- unsigned int former_primary_context_flags;
- int former_primary_context_is_active;
- CHECK_EQ(CUDA_SUCCESS,
- cuDevicePrimaryCtxGetState(device, &former_primary_context_flags,
- &former_primary_context_is_active));
- if (former_primary_context_flags != flags) {
- if (former_primary_context_is_active) {
- LOG(ERROR)
- << "The primary context is active and has a different flag set ("
- << former_primary_context_flags << ") than the desired flag set ("
- << flags << ").";
- } else {
- CHECK_EQ(CUDA_SUCCESS, cuDevicePrimaryCtxSetFlags(device, flags));
- }
- }
+ unsigned int former_primary_context_flags;
+ int former_primary_context_is_active;
+ CHECK_EQ(CUDA_SUCCESS,
+ cuDevicePrimaryCtxGetState(device, &former_primary_context_flags,
+ &former_primary_context_is_active));
+ if (former_primary_context_flags != flags) {
+ if (former_primary_context_is_active) {
+ LOG(ERROR)
+ << "The primary context is active and has a different flag set ("
+ << former_primary_context_flags << ") than the desired flag set ("
+ << flags << ").";
+ } else {
+ CHECK_EQ(CUDA_SUCCESS, cuDevicePrimaryCtxSetFlags(device, flags));
}
+ }
- former_context = CUDADriver::CurrentContextOrDie();
- res = cuDevicePrimaryCtxRetain(&new_context, device);
- if (former_context != nullptr) {
- CUdevice former_device;
- if (cuCtxGetDevice(&former_device) == CUDA_SUCCESS) {
- if (former_device == device) {
- if (former_context == new_context) {
- VLOG(2) << "The primary context " << former_context
- << " for device " << device
- << " exists before initializing the StreamExecutor.";
- } else {
- LOG(WARNING)
- << "A non-primary context " << former_context << " for device "
- << device
- << " exists before initializing the StreamExecutor. The "
- << "primary context is now " << new_context << ". We "
- << "haven't verified StreamExecutor works with that.";
- }
+ former_context = CUDADriver::CurrentContextOrDie();
+ res = cuDevicePrimaryCtxRetain(&new_context, device);
+ if (former_context != nullptr) {
+ CUdevice former_device;
+ if (cuCtxGetDevice(&former_device) == CUDA_SUCCESS) {
+ if (former_device == device) {
+ if (former_context == new_context) {
+ VLOG(2) << "The primary context " << former_context << " for device "
+ << device
+ << " exists before initializing the StreamExecutor.";
+ } else {
+ LOG(WARNING) << "A non-primary context " << former_context
+ << " for device " << device
+ << " exists before initializing the StreamExecutor. The "
+ << "primary context is now " << new_context << ". We "
+ << "haven't verified StreamExecutor works with that.";
}
- } else {
- LOG(ERROR) << "Failed to get the device of the current context "
- << former_context;
}
+ } else {
+ LOG(ERROR) << "Failed to get the device of the current context "
+ << former_context;
}
-#else
- former_context = CurrentContext();
- if (former_context != nullptr) {
- LOG(WARNING)
- << "creating context when one is currently active; existing: "
- << former_context;
- }
- res = cuCtxCreate(&new_context, flags, device);
-#endif
}
CHECK_EQ(CUDA_SUCCESS, cuCtxSetCurrent(former_context));
@@ -548,11 +532,7 @@ bool DeviceOptionsToContextFlags(const DeviceOptions &device_options,
return port::Status::OK();
}
-#if CUDA_VERSION >= 7000
string message = "failed call to cuDevicePrimaryCtxRetain: " + ToString(res);
-#else
- string message = "failed call to cuCtxCreate: " + ToString(res);
-#endif
if (res == CUDA_ERROR_OUT_OF_MEMORY) {
uint64 total_memory;
if (GetDeviceTotalMemory(device, &total_memory)) {
@@ -569,7 +549,6 @@ bool DeviceOptionsToContextFlags(const DeviceOptions &device_options,
if (context == nullptr) {
return;
}
-#if CUDA_VERSION >= 7000
CUcontext former_context = CurrentContext();
CUresult res = cuCtxSetCurrent(context->context());
CUdevice device;
@@ -577,9 +556,6 @@ bool DeviceOptionsToContextFlags(const DeviceOptions &device_options,
cuCtxSetCurrent(former_context);
res = cuDevicePrimaryCtxRelease(device);
-#else
- CUresult res = cuCtxDestroy(context->context());
-#endif
if (res != CUDA_SUCCESS) {
LOG(ERROR) << "failed to release CUDA context; leaking: " << ToString(res);
diff --git a/tensorflow/stream_executor/cuda/cuda_driver.h b/tensorflow/stream_executor/cuda/cuda_driver.h
index a9969e247e..b952cfaf68 100644
--- a/tensorflow/stream_executor/cuda/cuda_driver.h
+++ b/tensorflow/stream_executor/cuda/cuda_driver.h
@@ -147,7 +147,7 @@ class CUDADriver {
// userspace processes is given here:
// http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__CTX.html#group__CUDA__CTX_1g65dc0012348bc84810e2103a40d8e2cf
static port::Status CreateContext(CUdevice device,
- DeviceOptions device_options,
+ const DeviceOptions& device_options,
CudaContext** context);
// Destroys the provided context via cuCtxDestroy.