diff options
author | Asim Shankar <ashankar@google.com> | 2018-06-26 18:51:41 -0700 |
---|---|---|
committer | Gunhan Gulsoy <gunan@google.com> | 2018-06-28 21:37:43 -0700 |
commit | a1d6179adb1ca6208281ed955860c319525edf75 (patch) | |
tree | d59762033c0784b638c89304f3b3216a2bb7ce20 /tensorflow/core/protobuf | |
parent | 3336574287a16a0ead083a33b5e80a1c7204fa62 (diff) |
[C++]: Ability to feed and fetch tensors while keeping them in device memory
when using Session::RunCallable().
PiperOrigin-RevId: 202234757
Diffstat (limited to 'tensorflow/core/protobuf')
-rw-r--r-- | tensorflow/core/protobuf/config.proto | 64 |
1 files changed, 63 insertions, 1 deletions
diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto index d83215d5c2..7ea422187d 100644 --- a/tensorflow/core/protobuf/config.proto +++ b/tensorflow/core/protobuf/config.proto @@ -490,5 +490,67 @@ message CallableOptions { // in the callable. repeated TensorConnection tensor_connection = 5; - // Next: 6 + // The Tensor objects fed in the callable and fetched from the callable + // are expected to be backed by host (CPU) memory by default. + // + // The options below allow changing that - feeding tensors backed by + // device memory, or returning tensors that are backed by device memory. + // + // The maps below map the name of a feed/fetch tensor (which appears in + // 'feed' or 'fetch' fields above), to the fully qualified name of the device + // owning the memory backing the contents of the tensor. + // + // For example, creating a callable with the following options: + // + // CallableOptions { + // feed: "a:0" + // feed: "b:0" + // + // fetch: "x:0" + // fetch: "y:0" + // + // feed_devices: { + // "a:0": "/job:localhost/replica:0/task:0/device:GPU:0" + // } + // + // fetch_devices: { + // "y:0": "/job:localhost/replica:0/task:0/device:GPU:0" + // } + // } + // + // means that the Callable expects: + // - The first argument ("a:0") is a Tensor backed by GPU memory. + // - The second argument ("b:0") is a Tensor backed by host memory. + // and of its return values: + // - The first output ("x:0") will be backed by host memory. + // - The second output ("y:0") will be backed by GPU memory. + // + // FEEDS: + // It is the responsibility of the caller to ensure that the memory of the fed + // tensors will be correctly initialized and synchronized before it is + // accessed by operations executed during the call to Session::RunCallable(). + // + // This is typically ensured by using the TensorFlow memory allocators + // (Device::GetAllocator()) to create the Tensor to be fed. + // + // Alternatively, for CUDA-enabled GPU devices, this typically means that the + // operation that produced the contents of the tensor has completed, i.e., the + // CUDA stream has been synchronized (e.g., via cuCtxSynchronize() or + // cuStreamSynchronize()). + map<string, string> feed_devices = 6; + map<string, string> fetch_devices = 7; + + // By default, RunCallable() will synchronize the GPU stream before returning + // fetched tensors on a GPU device, to ensure that the values in those tensors + // have been produced. This simplifies interacting with the tensors, but + // potentially incurs a performance hit. + // + // If this options is set to true, the caller is responsible for ensuring + // that the values in the fetched tensors have been produced before they are + // used. The caller can do this by invoking `Device::Sync()` on the underlying + // device(s), or by feeding the tensors back to the same Session using + // `feed_devices` with the same corresponding device name. + bool fetch_skip_sync = 8; + + // Next: 9 } |