diff options
Diffstat (limited to 'tensorflow/stream_executor/cuda')
-rw-r--r-- | tensorflow/stream_executor/cuda/cuda_dnn.cc | 7 | ||||
-rw-r--r-- | tensorflow/stream_executor/cuda/cuda_dnn.h | 2 | ||||
-rw-r--r-- | tensorflow/stream_executor/cuda/cuda_driver.cc | 14 | ||||
-rw-r--r-- | tensorflow/stream_executor/cuda/cuda_gpu_executor.cc | 2 |
4 files changed, 9 insertions, 16 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index 640f270323..102419a264 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -524,11 +524,12 @@ port::Status CudnnSupport::Init() { ToString(status))}; } -port::StatusOr<std::tuple<int, int, int>> CudnnSupport::GetVersion() { +port::StatusOr<perftools::gputools::dnn::VersionInfo> +CudnnSupport::GetVersion() { CudnnVersion version; TF_RETURN_IF_ERROR(GetLoadedCudnnVersion(&version)); - return std::make_tuple(version.major_version, version.minor_version, - version.patch_level); + return perftools::gputools::dnn::VersionInfo( + version.major_version, version.minor_version, version.patch_level); } // Turns a BatchDescriptor structure into a cudnn tensor handle within a scope. diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.h b/tensorflow/stream_executor/cuda/cuda_dnn.h index e6d12bfef9..5ded7cf154 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.h +++ b/tensorflow/stream_executor/cuda/cuda_dnn.h @@ -45,7 +45,7 @@ class CudnnSupport : public dnn::DnnSupport { ~CudnnSupport() override; port::Status Init() override; - port::StatusOr<std::tuple<int, int, int>> GetVersion() override; + port::StatusOr<perftools::gputools::dnn::VersionInfo> GetVersion() override; port::StatusOr<std::unique_ptr<dnn::RnnDescriptor>> createRnnDescriptor( int num_layers, int hidden_size, int input_size, diff --git a/tensorflow/stream_executor/cuda/cuda_driver.cc b/tensorflow/stream_executor/cuda/cuda_driver.cc index fedf4f53b8..71cab145b9 100644 --- a/tensorflow/stream_executor/cuda/cuda_driver.cc +++ b/tensorflow/stream_executor/cuda/cuda_driver.cc @@ -37,14 +37,6 @@ limitations under the License. #include "tensorflow/stream_executor/platform/port.h" #include "tensorflow/stream_executor/lib/inlined_vector.h" -#if defined(PLATFORM_WINDOWS) -// TODO: in windows ARRAYSIZE is defined in winnt.h but including it -// here creates a conflict with cuda.h - for now define it here. -#define ARRAYSIZE(a) \ - ((sizeof(a) / sizeof(*(a))) / \ - static_cast<size_t>(!(sizeof(a) % sizeof(*(a))))) -#endif - bool FLAGS_gpuexec_cuda_driver_inject_init_error = false; bool FLAGS_gpuexec_cuda_sync_around_driver_calls = false; bool FLAGS_gpuexec_cuda_device_0_only = false; @@ -719,15 +711,15 @@ CUDADriver::ContextGetSharedMemConfig(CudaContext* context) { port::bit_cast<void *>(uintptr_t(info_log_buffer_bytes)), port::bit_cast<void *>(info_log_buffer.data()), port::bit_cast<void *>(uintptr_t(log_verbose))}; - CHECK(ARRAYSIZE(options) == ARRAYSIZE(option_values)); + CHECK(TF_ARRAYSIZE(options) == TF_ARRAYSIZE(option_values)); CUresult res; { // TODO(leary) Need to see if NVIDIA can expunge the leakiness in their // module loading: see http://b/13248943 - res = cuModuleLoadDataEx(module, ptx_data, ARRAYSIZE(options), options, - option_values); + res = cuModuleLoadDataEx(module, ptx_data, TF_ARRAYSIZE(options), + options, option_values); } // The PTX JIT mutates the values in the option values array to reflect the diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc index 9700daca89..7c87d33d21 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc @@ -1126,7 +1126,7 @@ DeviceDescription *CUDAExecutor::PopulateDeviceDescription() const { builder.set_name(device_name); } - for (size_t i = 0; i < ARRAYSIZE(kAllUnqueryableDeviceParams); i++) { + for (size_t i = 0; i < TF_ARRAYSIZE(kAllUnqueryableDeviceParams); i++) { const auto ¶ms = kAllUnqueryableDeviceParams[i]; if (params.cc_major == cc_major_ && params.cc_minor == cc_minor_) { builder.set_blocks_per_core_limit(params.blocks_per_core_limit); |