[C++]: Ability to feed and fetch tensors while keeping them in device memory

when using Session::RunCallable(). PiperOrigin-RevId: 202234757
author: Asim Shankar <ashankar@google.com> 2018-06-26 18:51:41 -0700
committer: Gunhan Gulsoy <gunan@google.com> 2018-06-28 21:37:43 -0700
commit: a1d6179adb1ca6208281ed955860c319525edf75 (patch)
tree: d59762033c0784b638c89304f3b3216a2bb7ce20 /tensorflow/core/protobuf
parent: 3336574287a16a0ead083a33b5e80a1c7204fa62 (diff)
1 files changed, 63 insertions, 1 deletions
diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto
index d83215d5c2..7ea422187d 100644
--- a/tensorflow/core/protobuf/config.proto
+++ b/tensorflow/core/protobuf/config.proto
@@ -490,5 +490,67 @@ message CallableOptions {
   // in the callable.
   repeated TensorConnection tensor_connection = 5;
 
-  // Next: 6
+  // The Tensor objects fed in the callable and fetched from the callable
+  // are expected to be backed by host (CPU) memory by default.
+  //
+  // The options below allow changing that - feeding tensors backed by
+  // device memory, or returning tensors that are backed by device memory.
+  //
+  // The maps below map the name of a feed/fetch tensor (which appears in
+  // 'feed' or 'fetch' fields above), to the fully qualified name of the device
+  // owning the memory backing the contents of the tensor.
+  //
+  // For example, creating a callable with the following options:
+  //
+  // CallableOptions {
+  //   feed: "a:0"
+  //   feed: "b:0"
+  //
+  //   fetch: "x:0"
+  //   fetch: "y:0"
+  //
+  //   feed_devices: {
+  //     "a:0": "/job:localhost/replica:0/task:0/device:GPU:0"
+  //   }
+  //
+  //   fetch_devices: {
+  //     "y:0": "/job:localhost/replica:0/task:0/device:GPU:0"
+  //  }
+  // }
+  //
+  // means that the Callable expects:
+  // - The first argument ("a:0") is a Tensor backed by GPU memory.
+  // - The second argument ("b:0") is a Tensor backed by host memory.
+  // and of its return values:
+  // - The first output ("x:0") will be backed by host memory.
+  // - The second output ("y:0") will be backed by GPU memory.
+  //
+  // FEEDS:
+  // It is the responsibility of the caller to ensure that the memory of the fed
+  // tensors will be correctly initialized and synchronized before it is
+  // accessed by operations executed during the call to Session::RunCallable().
+  //
+  // This is typically ensured by using the TensorFlow memory allocators
+  // (Device::GetAllocator()) to create the Tensor to be fed.
+  //
+  // Alternatively, for CUDA-enabled GPU devices, this typically means that the
+  // operation that produced the contents of the tensor has completed, i.e., the
+  // CUDA stream has been synchronized (e.g., via cuCtxSynchronize() or
+  // cuStreamSynchronize()).
+  map<string, string> feed_devices = 6;
+  map<string, string> fetch_devices = 7;
+
+  // By default, RunCallable() will synchronize the GPU stream before returning
+  // fetched tensors on a GPU device, to ensure that the values in those tensors
+  // have been produced. This simplifies interacting with the tensors, but
+  // potentially incurs a performance hit.
+  //
+  // If this options is set to true, the caller is responsible for ensuring
+  // that the values in the fetched tensors have been produced before they are
+  // used. The caller can do this by invoking `Device::Sync()` on the underlying
+  // device(s), or by feeding the tensors back to the same Session using
+  // `feed_devices` with the same corresponding device name.
+  bool fetch_skip_sync = 8;
+
+  // Next: 9
 }
author	Asim Shankar <ashankar@google.com>	2018-06-26 18:51:41 -0700
committer	Gunhan Gulsoy <gunan@google.com>	2018-06-28 21:37:43 -0700
commit	a1d6179adb1ca6208281ed955860c319525edf75 (patch)
tree	d59762033c0784b638c89304f3b3216a2bb7ce20 /tensorflow/core/protobuf
parent	3336574287a16a0ead083a33b5e80a1c7204fa62 (diff)