Introduce an option to allocate CUDA unified memory

PiperOrigin-RevId: 197490523
author: Smit Hinsu <hinsu@google.com> 2018-05-21 17:42:15 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2018-05-21 17:44:41 -0700
commit: b1139814f91c5216eb5ff229ee7e1982e5f4e888 (patch)
tree: 7f85c8229bfd47eeba49890aa75b59c8680e619c /tensorflow/core/protobuf
parent: d913a243196fa07d4728c8f7c1ce6444ecd086eb (diff)
1 files changed, 33 insertions, 11 deletions
diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto
index c1a0075b64..6cd067afcb 100644
--- a/tensorflow/core/protobuf/config.proto
+++ b/tensorflow/core/protobuf/config.proto
@@ -14,12 +14,29 @@ import "tensorflow/core/protobuf/cluster.proto";
 import "tensorflow/core/protobuf/rewriter_config.proto";
 
 message GPUOptions {
-  // A value between 0 and 1 that indicates what fraction of the
-  // available GPU memory to pre-allocate for each process.  1 means
-  // to pre-allocate all of the GPU memory, 0.5 means the process
-  // allocates ~50% of the available GPU memory.
+  // Fraction of the available GPU memory to allocate for each process.
+  // 1 means to allocate all of the GPU memory, 0.5 means the process
+  // allocates up to ~50% of the available GPU memory.
+  //
+  // GPU memory is pre-allocated unless the allow_growth option is enabled.
+  //
+  // If greater than 1.0, uses CUDA unified memory to potentially oversubscribe
+  // the amount of memory available on the GPU device by using host memory as a
+  // swap space. Accessing memory not available on the device will be
+  // significantly slower as that would require memory transfer between the host
+  // and the device. Options to reduce the memory requirement should be
+  // considered before enabling this option as this may come with a negative
+  // performance impact. Oversubscription using the unified memory requires
+  // Pascal class or newer GPUs and it is currently only supported on the Linux
+  // operating system. See
+  // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#um-requirements
+  // for the detailed requirements.
   double per_process_gpu_memory_fraction = 1;
 
+  // If true, the allocator does not pre-allocate the entire specified
+  // GPU memory region, instead starting small and growing as needed.
+  bool allow_growth = 4;
+
   // The type of GPU allocation strategy to use.
   //
   // Allowed values:
@@ -35,10 +52,6 @@ message GPUOptions {
   // a reasonable default (several MBs).
   int64 deferred_deletion_bytes = 3;
 
-  // If true, the allocator does not pre-allocate the entire specified
-  // GPU memory region, instead starting small and growing as needed.
-  bool allow_growth = 4;
-
   // A comma-separated list of GPU ids that determines the 'visible'
   // to 'virtual' mapping of GPU devices.  For example, if TensorFlow
   // can see 8 GPU devices in the process, and one wanted to map
@@ -82,9 +95,6 @@ message GPUOptions {
   // the overall host system performance.
   bool force_gpu_compatible = 8;
 
-  // Everything inside Experimental is subject to change and is not subject
-  // to API stability guarantees in
-  // https://www.tensorflow.org/programmers_guide/version_compat.
   message Experimental {
     // Configuration for breaking down a visible GPU into multiple "virtual"
     // devices.
@@ -124,8 +134,20 @@ message GPUOptions {
     //    different settings in different sessions within same process will
     //    result in undefined behavior.
     repeated VirtualDevices virtual_devices = 1;
+
+    // If true, uses CUDA unified memory for memory allocations. If
+    // per_process_gpu_memory_fraction option is greater than 1.0, then unified
+    // memory is used regardless of the value for this field. See comments for
+    // per_process_gpu_memory_fraction field for more details and requirements
+    // of the unified memory. This option is useful to oversubscribe memory if
+    // multiple processes are sharing a single GPU while individually using less
+    // than 1.0 per process memory fraction.
+    bool use_unified_memory = 2;
   }
 
+  // Everything inside experimental is subject to change and is not subject
+  // to API stability guarantees in
+  // https://www.tensorflow.org/programmers_guide/version_compat.
   Experimental experimental = 9;
 };
author	Smit Hinsu <hinsu@google.com>	2018-05-21 17:42:15 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2018-05-21 17:44:41 -0700
commit	b1139814f91c5216eb5ff229ee7e1982e5f4e888 (patch)
tree	7f85c8229bfd47eeba49890aa75b59c8680e619c /tensorflow/core/protobuf
parent	d913a243196fa07d4728c8f7c1ce6444ecd086eb (diff)