diff options
Diffstat (limited to 'tensorflow/stream_executor')
-rw-r--r-- | tensorflow/stream_executor/cuda/cuda_dnn.cc | 7 | ||||
-rw-r--r-- | tensorflow/stream_executor/cuda/cuda_dnn.h | 2 | ||||
-rw-r--r-- | tensorflow/stream_executor/cuda/cuda_driver.cc | 14 | ||||
-rw-r--r-- | tensorflow/stream_executor/cuda/cuda_gpu_executor.cc | 2 | ||||
-rw-r--r-- | tensorflow/stream_executor/dnn.h | 20 | ||||
-rw-r--r-- | tensorflow/stream_executor/platform/port.h | 6 |
6 files changed, 27 insertions, 24 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index 640f270323..102419a264 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -524,11 +524,12 @@ port::Status CudnnSupport::Init() { ToString(status))}; } -port::StatusOr<std::tuple<int, int, int>> CudnnSupport::GetVersion() { +port::StatusOr<perftools::gputools::dnn::VersionInfo> +CudnnSupport::GetVersion() { CudnnVersion version; TF_RETURN_IF_ERROR(GetLoadedCudnnVersion(&version)); - return std::make_tuple(version.major_version, version.minor_version, - version.patch_level); + return perftools::gputools::dnn::VersionInfo( + version.major_version, version.minor_version, version.patch_level); } // Turns a BatchDescriptor structure into a cudnn tensor handle within a scope. diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.h b/tensorflow/stream_executor/cuda/cuda_dnn.h index e6d12bfef9..5ded7cf154 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.h +++ b/tensorflow/stream_executor/cuda/cuda_dnn.h @@ -45,7 +45,7 @@ class CudnnSupport : public dnn::DnnSupport { ~CudnnSupport() override; port::Status Init() override; - port::StatusOr<std::tuple<int, int, int>> GetVersion() override; + port::StatusOr<perftools::gputools::dnn::VersionInfo> GetVersion() override; port::StatusOr<std::unique_ptr<dnn::RnnDescriptor>> createRnnDescriptor( int num_layers, int hidden_size, int input_size, diff --git a/tensorflow/stream_executor/cuda/cuda_driver.cc b/tensorflow/stream_executor/cuda/cuda_driver.cc index fedf4f53b8..71cab145b9 100644 --- a/tensorflow/stream_executor/cuda/cuda_driver.cc +++ b/tensorflow/stream_executor/cuda/cuda_driver.cc @@ -37,14 +37,6 @@ limitations under the License. #include "tensorflow/stream_executor/platform/port.h" #include "tensorflow/stream_executor/lib/inlined_vector.h" -#if defined(PLATFORM_WINDOWS) -// TODO: in windows ARRAYSIZE is defined in winnt.h but including it -// here creates a conflict with cuda.h - for now define it here. -#define ARRAYSIZE(a) \ - ((sizeof(a) / sizeof(*(a))) / \ - static_cast<size_t>(!(sizeof(a) % sizeof(*(a))))) -#endif - bool FLAGS_gpuexec_cuda_driver_inject_init_error = false; bool FLAGS_gpuexec_cuda_sync_around_driver_calls = false; bool FLAGS_gpuexec_cuda_device_0_only = false; @@ -719,15 +711,15 @@ CUDADriver::ContextGetSharedMemConfig(CudaContext* context) { port::bit_cast<void *>(uintptr_t(info_log_buffer_bytes)), port::bit_cast<void *>(info_log_buffer.data()), port::bit_cast<void *>(uintptr_t(log_verbose))}; - CHECK(ARRAYSIZE(options) == ARRAYSIZE(option_values)); + CHECK(TF_ARRAYSIZE(options) == TF_ARRAYSIZE(option_values)); CUresult res; { // TODO(leary) Need to see if NVIDIA can expunge the leakiness in their // module loading: see http://b/13248943 - res = cuModuleLoadDataEx(module, ptx_data, ARRAYSIZE(options), options, - option_values); + res = cuModuleLoadDataEx(module, ptx_data, TF_ARRAYSIZE(options), + options, option_values); } // The PTX JIT mutates the values in the option values array to reflect the diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc index 9700daca89..7c87d33d21 100644 --- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc +++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc @@ -1126,7 +1126,7 @@ DeviceDescription *CUDAExecutor::PopulateDeviceDescription() const { builder.set_name(device_name); } - for (size_t i = 0; i < ARRAYSIZE(kAllUnqueryableDeviceParams); i++) { + for (size_t i = 0; i < TF_ARRAYSIZE(kAllUnqueryableDeviceParams); i++) { const auto ¶ms = kAllUnqueryableDeviceParams[i]; if (params.cc_major == cc_major_ && params.cc_minor == cc_minor_) { builder.set_blocks_per_core_limit(params.blocks_per_core_limit); diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h index 8e202d115a..39f21d8b10 100644 --- a/tensorflow/stream_executor/dnn.h +++ b/tensorflow/stream_executor/dnn.h @@ -875,6 +875,22 @@ enum class ElementwiseOperation { kAdd, kMultiply }; string ElementwiseOperationString(ElementwiseOperation op); +// A simple class representing the version of the backing library, to +// workaround the "too perfect forwarding" issue in gcc6+ compilers. +// See PR#16309 and issue #18402 for links discussing the issue. +class VersionInfo { + public: + VersionInfo(int major = 0, int minor = 0, int patch = 0) + : major_(major), minor_(minor), patch_(patch) {} + int major_version() { return major_; } + int minor_version() { return minor_; } + int patch() { return patch_; } + private: + int major_; + int minor_; + int patch_; +}; + // Suite of operations typically used for implementing Deep/Convolutional Neural // Nets. Note: A false return value of an operation indicates the // implementation is not available. @@ -885,8 +901,8 @@ class DnnSupport { virtual port::Status Init() = 0; - // Gets the version of the backing library, as a {major, minor, patch} tuple. - virtual port::StatusOr<std::tuple<int, int, int>> GetVersion() { + // Gets the version of the backing library, as a VersionInfo object. + virtual port::StatusOr<VersionInfo> GetVersion() { return port::UnimplementedError( "DnnSupport::GetVersion not implemented on this platform."); } diff --git a/tensorflow/stream_executor/platform/port.h b/tensorflow/stream_executor/platform/port.h index 259cf380d6..57ad965ef1 100644 --- a/tensorflow/stream_executor/platform/port.h +++ b/tensorflow/stream_executor/platform/port.h @@ -38,12 +38,6 @@ using tensorflow::uint64; using std::string; #endif -#if !defined(COMPILER_MSVC) -#define ARRAYSIZE(a) \ - ((sizeof(a) / sizeof(*(a))) / \ - static_cast<size_t>(!(sizeof(a) % sizeof(*(a))))) -#endif - using tensorflow::LinkerInitialized; using tensorflow::LINKER_INITIALIZED; |