aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/protobuf
diff options
context:
space:
mode:
authorGravatar Smit Hinsu <hinsu@google.com>2018-05-21 17:42:15 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-05-21 17:44:41 -0700
commitb1139814f91c5216eb5ff229ee7e1982e5f4e888 (patch)
tree7f85c8229bfd47eeba49890aa75b59c8680e619c /tensorflow/core/protobuf
parentd913a243196fa07d4728c8f7c1ce6444ecd086eb (diff)
Introduce an option to allocate CUDA unified memory
PiperOrigin-RevId: 197490523
Diffstat (limited to 'tensorflow/core/protobuf')
-rw-r--r--tensorflow/core/protobuf/config.proto44
1 files changed, 33 insertions, 11 deletions
diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto
index c1a0075b64..6cd067afcb 100644
--- a/tensorflow/core/protobuf/config.proto
+++ b/tensorflow/core/protobuf/config.proto
@@ -14,12 +14,29 @@ import "tensorflow/core/protobuf/cluster.proto";
import "tensorflow/core/protobuf/rewriter_config.proto";
message GPUOptions {
- // A value between 0 and 1 that indicates what fraction of the
- // available GPU memory to pre-allocate for each process. 1 means
- // to pre-allocate all of the GPU memory, 0.5 means the process
- // allocates ~50% of the available GPU memory.
+ // Fraction of the available GPU memory to allocate for each process.
+ // 1 means to allocate all of the GPU memory, 0.5 means the process
+ // allocates up to ~50% of the available GPU memory.
+ //
+ // GPU memory is pre-allocated unless the allow_growth option is enabled.
+ //
+ // If greater than 1.0, uses CUDA unified memory to potentially oversubscribe
+ // the amount of memory available on the GPU device by using host memory as a
+ // swap space. Accessing memory not available on the device will be
+ // significantly slower as that would require memory transfer between the host
+ // and the device. Options to reduce the memory requirement should be
+ // considered before enabling this option as this may come with a negative
+ // performance impact. Oversubscription using the unified memory requires
+ // Pascal class or newer GPUs and it is currently only supported on the Linux
+ // operating system. See
+ // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#um-requirements
+ // for the detailed requirements.
double per_process_gpu_memory_fraction = 1;
+ // If true, the allocator does not pre-allocate the entire specified
+ // GPU memory region, instead starting small and growing as needed.
+ bool allow_growth = 4;
+
// The type of GPU allocation strategy to use.
//
// Allowed values:
@@ -35,10 +52,6 @@ message GPUOptions {
// a reasonable default (several MBs).
int64 deferred_deletion_bytes = 3;
- // If true, the allocator does not pre-allocate the entire specified
- // GPU memory region, instead starting small and growing as needed.
- bool allow_growth = 4;
-
// A comma-separated list of GPU ids that determines the 'visible'
// to 'virtual' mapping of GPU devices. For example, if TensorFlow
// can see 8 GPU devices in the process, and one wanted to map
@@ -82,9 +95,6 @@ message GPUOptions {
// the overall host system performance.
bool force_gpu_compatible = 8;
- // Everything inside Experimental is subject to change and is not subject
- // to API stability guarantees in
- // https://www.tensorflow.org/programmers_guide/version_compat.
message Experimental {
// Configuration for breaking down a visible GPU into multiple "virtual"
// devices.
@@ -124,8 +134,20 @@ message GPUOptions {
// different settings in different sessions within same process will
// result in undefined behavior.
repeated VirtualDevices virtual_devices = 1;
+
+ // If true, uses CUDA unified memory for memory allocations. If
+ // per_process_gpu_memory_fraction option is greater than 1.0, then unified
+ // memory is used regardless of the value for this field. See comments for
+ // per_process_gpu_memory_fraction field for more details and requirements
+ // of the unified memory. This option is useful to oversubscribe memory if
+ // multiple processes are sharing a single GPU while individually using less
+ // than 1.0 per process memory fraction.
+ bool use_unified_memory = 2;
}
+ // Everything inside experimental is subject to change and is not subject
+ // to API stability guarantees in
+ // https://www.tensorflow.org/programmers_guide/version_compat.
Experimental experimental = 9;
};