diff options
author | 2018-05-21 17:42:15 -0700 | |
---|---|---|
committer | 2018-05-21 17:44:41 -0700 | |
commit | b1139814f91c5216eb5ff229ee7e1982e5f4e888 (patch) | |
tree | 7f85c8229bfd47eeba49890aa75b59c8680e619c /tensorflow/core/protobuf | |
parent | d913a243196fa07d4728c8f7c1ce6444ecd086eb (diff) |
Introduce an option to allocate CUDA unified memory
PiperOrigin-RevId: 197490523
Diffstat (limited to 'tensorflow/core/protobuf')
-rw-r--r-- | tensorflow/core/protobuf/config.proto | 44 |
1 files changed, 33 insertions, 11 deletions
diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto index c1a0075b64..6cd067afcb 100644 --- a/tensorflow/core/protobuf/config.proto +++ b/tensorflow/core/protobuf/config.proto @@ -14,12 +14,29 @@ import "tensorflow/core/protobuf/cluster.proto"; import "tensorflow/core/protobuf/rewriter_config.proto"; message GPUOptions { - // A value between 0 and 1 that indicates what fraction of the - // available GPU memory to pre-allocate for each process. 1 means - // to pre-allocate all of the GPU memory, 0.5 means the process - // allocates ~50% of the available GPU memory. + // Fraction of the available GPU memory to allocate for each process. + // 1 means to allocate all of the GPU memory, 0.5 means the process + // allocates up to ~50% of the available GPU memory. + // + // GPU memory is pre-allocated unless the allow_growth option is enabled. + // + // If greater than 1.0, uses CUDA unified memory to potentially oversubscribe + // the amount of memory available on the GPU device by using host memory as a + // swap space. Accessing memory not available on the device will be + // significantly slower as that would require memory transfer between the host + // and the device. Options to reduce the memory requirement should be + // considered before enabling this option as this may come with a negative + // performance impact. Oversubscription using the unified memory requires + // Pascal class or newer GPUs and it is currently only supported on the Linux + // operating system. See + // https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#um-requirements + // for the detailed requirements. double per_process_gpu_memory_fraction = 1; + // If true, the allocator does not pre-allocate the entire specified + // GPU memory region, instead starting small and growing as needed. + bool allow_growth = 4; + // The type of GPU allocation strategy to use. // // Allowed values: @@ -35,10 +52,6 @@ message GPUOptions { // a reasonable default (several MBs). int64 deferred_deletion_bytes = 3; - // If true, the allocator does not pre-allocate the entire specified - // GPU memory region, instead starting small and growing as needed. - bool allow_growth = 4; - // A comma-separated list of GPU ids that determines the 'visible' // to 'virtual' mapping of GPU devices. For example, if TensorFlow // can see 8 GPU devices in the process, and one wanted to map @@ -82,9 +95,6 @@ message GPUOptions { // the overall host system performance. bool force_gpu_compatible = 8; - // Everything inside Experimental is subject to change and is not subject - // to API stability guarantees in - // https://www.tensorflow.org/programmers_guide/version_compat. message Experimental { // Configuration for breaking down a visible GPU into multiple "virtual" // devices. @@ -124,8 +134,20 @@ message GPUOptions { // different settings in different sessions within same process will // result in undefined behavior. repeated VirtualDevices virtual_devices = 1; + + // If true, uses CUDA unified memory for memory allocations. If + // per_process_gpu_memory_fraction option is greater than 1.0, then unified + // memory is used regardless of the value for this field. See comments for + // per_process_gpu_memory_fraction field for more details and requirements + // of the unified memory. This option is useful to oversubscribe memory if + // multiple processes are sharing a single GPU while individually using less + // than 1.0 per process memory fraction. + bool use_unified_memory = 2; } + // Everything inside experimental is subject to change and is not subject + // to API stability guarantees in + // https://www.tensorflow.org/programmers_guide/version_compat. Experimental experimental = 9; }; |