diff options
author | 2017-04-13 00:52:08 -0800 | |
---|---|---|
committer | 2017-04-13 02:09:42 -0700 | |
commit | b4396632f78624057eefc79721e5081254068d48 (patch) | |
tree | 9452e44fecbc0c90e56bda5dbe137ec5ce290c8f | |
parent | cf1a098bc78897d04ec14c411ddf2a158fdb0851 (diff) |
Add a GPUOPTIONS option to force all tensors to be gpu_compatible
Change: 153039058
4 files changed, 29 insertions, 4 deletions
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h index 370b3cc4f6..2525931301 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.h +++ b/tensorflow/core/common_runtime/gpu/gpu_device.h @@ -108,6 +108,7 @@ class BaseGPUDevice : public LocalDevice { mutex trace_mu_; int gpu_id_ = -1; const bool sync_every_op_ = false; + bool force_gpu_compatible_ = false; const int32 max_streams_; std::unique_ptr<EventMgr> em_; diff --git a/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc b/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc index 94143a55d5..d9fa5a6b96 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_device_factory.cc @@ -31,12 +31,16 @@ class GPUDevice : public BaseGPUDevice { Allocator* cpu_allocator) : BaseGPUDevice(options, name, memory_limit, locality, gpu_id, physical_device_desc, gpu_allocator, cpu_allocator, - false /* sync every op */, 1 /* max_streams */) {} + false /* sync every op */, 1 /* max_streams */) { + if (options.config.has_gpu_options()) { + force_gpu_compatible_ = options.config.gpu_options.force_gpu_compatible; + } + } Allocator* GetAllocator(AllocatorAttributes attr) override { if (attr.on_host()) { ProcessState* ps = ProcessState::singleton(); - if (attr.gpu_compatible()) { + if (attr.gpu_compatible() || force_gpu_compatible_) { return ps->GetCUDAHostAllocator(0); } else { return cpu_allocator_; @@ -71,12 +75,16 @@ class GPUCompatibleCPUDevice : public ThreadPoolDevice { GPUCompatibleCPUDevice(const SessionOptions& options, const string& name, Bytes memory_limit, const DeviceLocality& locality, Allocator* allocator) - : ThreadPoolDevice(options, name, memory_limit, locality, allocator) {} + : ThreadPoolDevice(options, name, memory_limit, locality, allocator) { + if (options.config.has_gpu_options()) { + force_gpu_compatible_ = options.config.gpu_options.force_gpu_compatible; + } + } ~GPUCompatibleCPUDevice() override {} Allocator* GetAllocator(AllocatorAttributes attr) override { ProcessState* ps = ProcessState::singleton(); - if (attr.gpu_compatible()) { + if (attr.gpu_compatible() || force_gpu_compatible_) { return ps->GetCUDAHostAllocator(0); } else { // Call the parent's implementation. diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto index 98e7b171d2..5c0f7232eb 100644 --- a/tensorflow/core/protobuf/config.proto +++ b/tensorflow/core/protobuf/config.proto @@ -64,6 +64,18 @@ message GPUOptions { // PollEvents calls, when the queue is empty. If value is not // set or set to 0, gets set to a non-zero default. int32 polling_inactive_delay_msecs = 7; + + // Force all tensors to be gpu_compatible. On a GPU-enabled TensorFlow, + // enabling this option forces all CPU tensors to be allocated with Cuda + // pinned memory. Normally, TensorFlow will infer which tensors should be + // allocated as the pinned memory. But in case where the inference is + // incomplete, this option can significantly speed up the cross-device memory + // copy performance as long as it fits the memory. + // Note that this option is not something that should be + // enabled by default for unknown or very large models, since all Cuda pinned + // memory is unpageable, having too much pinned memory might negatively impact + // the overall host system performance. + bool force_gpu_compatible = 8; }; // Options passed to the graph optimizer diff --git a/tensorflow/tools/api/golden/tensorflow.-g-p-u-options.pbtxt b/tensorflow/tools/api/golden/tensorflow.-g-p-u-options.pbtxt index 48cda623f7..30f7e4e116 100644 --- a/tensorflow/tools/api/golden/tensorflow.-g-p-u-options.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.-g-p-u-options.pbtxt @@ -23,6 +23,10 @@ tf_class { mtype: "<type \'getset_descriptor\'>" } member { + name: "FORCE_GPU_COMPATIBLE_FIELD_NUMBER" + mtype: "<type \'int\'>" + } + member { name: "PER_PROCESS_GPU_MEMORY_FRACTION_FIELD_NUMBER" mtype: "<type \'int\'>" } |