aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/cuda
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/stream_executor/cuda')
-rw-r--r--tensorflow/stream_executor/cuda/cuda_dnn.cc7
-rw-r--r--tensorflow/stream_executor/cuda/cuda_dnn.h2
-rw-r--r--tensorflow/stream_executor/cuda/cuda_driver.cc14
-rw-r--r--tensorflow/stream_executor/cuda/cuda_gpu_executor.cc2
4 files changed, 9 insertions, 16 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index 640f270323..102419a264 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -524,11 +524,12 @@ port::Status CudnnSupport::Init() {
ToString(status))};
}
-port::StatusOr<std::tuple<int, int, int>> CudnnSupport::GetVersion() {
+port::StatusOr<perftools::gputools::dnn::VersionInfo>
+CudnnSupport::GetVersion() {
CudnnVersion version;
TF_RETURN_IF_ERROR(GetLoadedCudnnVersion(&version));
- return std::make_tuple(version.major_version, version.minor_version,
- version.patch_level);
+ return perftools::gputools::dnn::VersionInfo(
+ version.major_version, version.minor_version, version.patch_level);
}
// Turns a BatchDescriptor structure into a cudnn tensor handle within a scope.
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.h b/tensorflow/stream_executor/cuda/cuda_dnn.h
index e6d12bfef9..5ded7cf154 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.h
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.h
@@ -45,7 +45,7 @@ class CudnnSupport : public dnn::DnnSupport {
~CudnnSupport() override;
port::Status Init() override;
- port::StatusOr<std::tuple<int, int, int>> GetVersion() override;
+ port::StatusOr<perftools::gputools::dnn::VersionInfo> GetVersion() override;
port::StatusOr<std::unique_ptr<dnn::RnnDescriptor>> createRnnDescriptor(
int num_layers, int hidden_size, int input_size,
diff --git a/tensorflow/stream_executor/cuda/cuda_driver.cc b/tensorflow/stream_executor/cuda/cuda_driver.cc
index fedf4f53b8..71cab145b9 100644
--- a/tensorflow/stream_executor/cuda/cuda_driver.cc
+++ b/tensorflow/stream_executor/cuda/cuda_driver.cc
@@ -37,14 +37,6 @@ limitations under the License.
#include "tensorflow/stream_executor/platform/port.h"
#include "tensorflow/stream_executor/lib/inlined_vector.h"
-#if defined(PLATFORM_WINDOWS)
-// TODO: in windows ARRAYSIZE is defined in winnt.h but including it
-// here creates a conflict with cuda.h - for now define it here.
-#define ARRAYSIZE(a) \
- ((sizeof(a) / sizeof(*(a))) / \
- static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
-#endif
-
bool FLAGS_gpuexec_cuda_driver_inject_init_error = false;
bool FLAGS_gpuexec_cuda_sync_around_driver_calls = false;
bool FLAGS_gpuexec_cuda_device_0_only = false;
@@ -719,15 +711,15 @@ CUDADriver::ContextGetSharedMemConfig(CudaContext* context) {
port::bit_cast<void *>(uintptr_t(info_log_buffer_bytes)),
port::bit_cast<void *>(info_log_buffer.data()),
port::bit_cast<void *>(uintptr_t(log_verbose))};
- CHECK(ARRAYSIZE(options) == ARRAYSIZE(option_values));
+ CHECK(TF_ARRAYSIZE(options) == TF_ARRAYSIZE(option_values));
CUresult res;
{
// TODO(leary) Need to see if NVIDIA can expunge the leakiness in their
// module loading: see http://b/13248943
- res = cuModuleLoadDataEx(module, ptx_data, ARRAYSIZE(options), options,
- option_values);
+ res = cuModuleLoadDataEx(module, ptx_data, TF_ARRAYSIZE(options),
+ options, option_values);
}
// The PTX JIT mutates the values in the option values array to reflect the
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index 9700daca89..7c87d33d21 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -1126,7 +1126,7 @@ DeviceDescription *CUDAExecutor::PopulateDeviceDescription() const {
builder.set_name(device_name);
}
- for (size_t i = 0; i < ARRAYSIZE(kAllUnqueryableDeviceParams); i++) {
+ for (size_t i = 0; i < TF_ARRAYSIZE(kAllUnqueryableDeviceParams); i++) {
const auto &params = kAllUnqueryableDeviceParams[i];
if (params.cc_major == cc_major_ && params.cc_minor == cc_minor_) {
builder.set_blocks_per_core_limit(params.blocks_per_core_limit);