aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/stream_executor')
-rw-r--r--tensorflow/stream_executor/cuda/cuda_dnn.cc7
-rw-r--r--tensorflow/stream_executor/cuda/cuda_dnn.h2
-rw-r--r--tensorflow/stream_executor/cuda/cuda_driver.cc14
-rw-r--r--tensorflow/stream_executor/cuda/cuda_gpu_executor.cc2
-rw-r--r--tensorflow/stream_executor/dnn.h20
-rw-r--r--tensorflow/stream_executor/platform/port.h6
6 files changed, 27 insertions, 24 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index 640f270323..102419a264 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -524,11 +524,12 @@ port::Status CudnnSupport::Init() {
ToString(status))};
}
-port::StatusOr<std::tuple<int, int, int>> CudnnSupport::GetVersion() {
+port::StatusOr<perftools::gputools::dnn::VersionInfo>
+CudnnSupport::GetVersion() {
CudnnVersion version;
TF_RETURN_IF_ERROR(GetLoadedCudnnVersion(&version));
- return std::make_tuple(version.major_version, version.minor_version,
- version.patch_level);
+ return perftools::gputools::dnn::VersionInfo(
+ version.major_version, version.minor_version, version.patch_level);
}
// Turns a BatchDescriptor structure into a cudnn tensor handle within a scope.
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.h b/tensorflow/stream_executor/cuda/cuda_dnn.h
index e6d12bfef9..5ded7cf154 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.h
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.h
@@ -45,7 +45,7 @@ class CudnnSupport : public dnn::DnnSupport {
~CudnnSupport() override;
port::Status Init() override;
- port::StatusOr<std::tuple<int, int, int>> GetVersion() override;
+ port::StatusOr<perftools::gputools::dnn::VersionInfo> GetVersion() override;
port::StatusOr<std::unique_ptr<dnn::RnnDescriptor>> createRnnDescriptor(
int num_layers, int hidden_size, int input_size,
diff --git a/tensorflow/stream_executor/cuda/cuda_driver.cc b/tensorflow/stream_executor/cuda/cuda_driver.cc
index fedf4f53b8..71cab145b9 100644
--- a/tensorflow/stream_executor/cuda/cuda_driver.cc
+++ b/tensorflow/stream_executor/cuda/cuda_driver.cc
@@ -37,14 +37,6 @@ limitations under the License.
#include "tensorflow/stream_executor/platform/port.h"
#include "tensorflow/stream_executor/lib/inlined_vector.h"
-#if defined(PLATFORM_WINDOWS)
-// TODO: in windows ARRAYSIZE is defined in winnt.h but including it
-// here creates a conflict with cuda.h - for now define it here.
-#define ARRAYSIZE(a) \
- ((sizeof(a) / sizeof(*(a))) / \
- static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
-#endif
-
bool FLAGS_gpuexec_cuda_driver_inject_init_error = false;
bool FLAGS_gpuexec_cuda_sync_around_driver_calls = false;
bool FLAGS_gpuexec_cuda_device_0_only = false;
@@ -719,15 +711,15 @@ CUDADriver::ContextGetSharedMemConfig(CudaContext* context) {
port::bit_cast<void *>(uintptr_t(info_log_buffer_bytes)),
port::bit_cast<void *>(info_log_buffer.data()),
port::bit_cast<void *>(uintptr_t(log_verbose))};
- CHECK(ARRAYSIZE(options) == ARRAYSIZE(option_values));
+ CHECK(TF_ARRAYSIZE(options) == TF_ARRAYSIZE(option_values));
CUresult res;
{
// TODO(leary) Need to see if NVIDIA can expunge the leakiness in their
// module loading: see http://b/13248943
- res = cuModuleLoadDataEx(module, ptx_data, ARRAYSIZE(options), options,
- option_values);
+ res = cuModuleLoadDataEx(module, ptx_data, TF_ARRAYSIZE(options),
+ options, option_values);
}
// The PTX JIT mutates the values in the option values array to reflect the
diff --git a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
index 9700daca89..7c87d33d21 100644
--- a/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
+++ b/tensorflow/stream_executor/cuda/cuda_gpu_executor.cc
@@ -1126,7 +1126,7 @@ DeviceDescription *CUDAExecutor::PopulateDeviceDescription() const {
builder.set_name(device_name);
}
- for (size_t i = 0; i < ARRAYSIZE(kAllUnqueryableDeviceParams); i++) {
+ for (size_t i = 0; i < TF_ARRAYSIZE(kAllUnqueryableDeviceParams); i++) {
const auto &params = kAllUnqueryableDeviceParams[i];
if (params.cc_major == cc_major_ && params.cc_minor == cc_minor_) {
builder.set_blocks_per_core_limit(params.blocks_per_core_limit);
diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h
index 8e202d115a..39f21d8b10 100644
--- a/tensorflow/stream_executor/dnn.h
+++ b/tensorflow/stream_executor/dnn.h
@@ -875,6 +875,22 @@ enum class ElementwiseOperation { kAdd, kMultiply };
string ElementwiseOperationString(ElementwiseOperation op);
+// A simple class representing the version of the backing library, to
+// workaround the "too perfect forwarding" issue in gcc6+ compilers.
+// See PR#16309 and issue #18402 for links discussing the issue.
+class VersionInfo {
+ public:
+ VersionInfo(int major = 0, int minor = 0, int patch = 0)
+ : major_(major), minor_(minor), patch_(patch) {}
+ int major_version() { return major_; }
+ int minor_version() { return minor_; }
+ int patch() { return patch_; }
+ private:
+ int major_;
+ int minor_;
+ int patch_;
+};
+
// Suite of operations typically used for implementing Deep/Convolutional Neural
// Nets. Note: A false return value of an operation indicates the
// implementation is not available.
@@ -885,8 +901,8 @@ class DnnSupport {
virtual port::Status Init() = 0;
- // Gets the version of the backing library, as a {major, minor, patch} tuple.
- virtual port::StatusOr<std::tuple<int, int, int>> GetVersion() {
+ // Gets the version of the backing library, as a VersionInfo object.
+ virtual port::StatusOr<VersionInfo> GetVersion() {
return port::UnimplementedError(
"DnnSupport::GetVersion not implemented on this platform.");
}
diff --git a/tensorflow/stream_executor/platform/port.h b/tensorflow/stream_executor/platform/port.h
index 259cf380d6..57ad965ef1 100644
--- a/tensorflow/stream_executor/platform/port.h
+++ b/tensorflow/stream_executor/platform/port.h
@@ -38,12 +38,6 @@ using tensorflow::uint64;
using std::string;
#endif
-#if !defined(COMPILER_MSVC)
-#define ARRAYSIZE(a) \
- ((sizeof(a) / sizeof(*(a))) / \
- static_cast<size_t>(!(sizeof(a) % sizeof(*(a)))))
-#endif
-
using tensorflow::LinkerInitialized;
using tensorflow::LINKER_INITIALIZED;