aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/protobuf
diff options
context:
space:
mode:
authorGravatar Asim Shankar <ashankar@google.com>2018-06-26 18:51:41 -0700
committerGravatar Gunhan Gulsoy <gunan@google.com>2018-06-28 21:37:43 -0700
commita1d6179adb1ca6208281ed955860c319525edf75 (patch)
treed59762033c0784b638c89304f3b3216a2bb7ce20 /tensorflow/core/protobuf
parent3336574287a16a0ead083a33b5e80a1c7204fa62 (diff)
[C++]: Ability to feed and fetch tensors while keeping them in device memory
when using Session::RunCallable(). PiperOrigin-RevId: 202234757
Diffstat (limited to 'tensorflow/core/protobuf')
-rw-r--r--tensorflow/core/protobuf/config.proto64
1 files changed, 63 insertions, 1 deletions
diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto
index d83215d5c2..7ea422187d 100644
--- a/tensorflow/core/protobuf/config.proto
+++ b/tensorflow/core/protobuf/config.proto
@@ -490,5 +490,67 @@ message CallableOptions {
// in the callable.
repeated TensorConnection tensor_connection = 5;
- // Next: 6
+ // The Tensor objects fed in the callable and fetched from the callable
+ // are expected to be backed by host (CPU) memory by default.
+ //
+ // The options below allow changing that - feeding tensors backed by
+ // device memory, or returning tensors that are backed by device memory.
+ //
+ // The maps below map the name of a feed/fetch tensor (which appears in
+ // 'feed' or 'fetch' fields above), to the fully qualified name of the device
+ // owning the memory backing the contents of the tensor.
+ //
+ // For example, creating a callable with the following options:
+ //
+ // CallableOptions {
+ // feed: "a:0"
+ // feed: "b:0"
+ //
+ // fetch: "x:0"
+ // fetch: "y:0"
+ //
+ // feed_devices: {
+ // "a:0": "/job:localhost/replica:0/task:0/device:GPU:0"
+ // }
+ //
+ // fetch_devices: {
+ // "y:0": "/job:localhost/replica:0/task:0/device:GPU:0"
+ // }
+ // }
+ //
+ // means that the Callable expects:
+ // - The first argument ("a:0") is a Tensor backed by GPU memory.
+ // - The second argument ("b:0") is a Tensor backed by host memory.
+ // and of its return values:
+ // - The first output ("x:0") will be backed by host memory.
+ // - The second output ("y:0") will be backed by GPU memory.
+ //
+ // FEEDS:
+ // It is the responsibility of the caller to ensure that the memory of the fed
+ // tensors will be correctly initialized and synchronized before it is
+ // accessed by operations executed during the call to Session::RunCallable().
+ //
+ // This is typically ensured by using the TensorFlow memory allocators
+ // (Device::GetAllocator()) to create the Tensor to be fed.
+ //
+ // Alternatively, for CUDA-enabled GPU devices, this typically means that the
+ // operation that produced the contents of the tensor has completed, i.e., the
+ // CUDA stream has been synchronized (e.g., via cuCtxSynchronize() or
+ // cuStreamSynchronize()).
+ map<string, string> feed_devices = 6;
+ map<string, string> fetch_devices = 7;
+
+ // By default, RunCallable() will synchronize the GPU stream before returning
+ // fetched tensors on a GPU device, to ensure that the values in those tensors
+ // have been produced. This simplifies interacting with the tensors, but
+ // potentially incurs a performance hit.
+ //
+ // If this options is set to true, the caller is responsible for ensuring
+ // that the values in the fetched tensors have been produced before they are
+ // used. The caller can do this by invoking `Device::Sync()` on the underlying
+ // device(s), or by feeding the tensors back to the same Session using
+ // `feed_devices` with the same corresponding device name.
+ bool fetch_skip_sync = 8;
+
+ // Next: 9
}