aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/sendrecv_ops.h
diff options
context:
space:
mode:
authorGravatar Derek Murray <mrry@google.com>2017-06-05 10:47:38 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-06-05 10:51:14 -0700
commit9f932e6ce6061d251ed8e6b038fe3864a9b4d07a (patch)
tree3f2615800d119355b7f88df43ab0f5be0254ff7d /tensorflow/core/kernels/sendrecv_ops.h
parentcc2dd4ac8538045e94e3f8fe4fb1c532f67c1844 (diff)
Avoid parsing a rendezvous key for Send/Recv ops outside a loop.
For such ops, the rendezvous key will be constant, because `ctx->frame_iter()` will always evaluate to `{0, 0}`. Benchmarking reveals that this can save between 1 and 2 microseconds per Send or Recv op execution. The optimization applies to all cross-process, inter-device, and intra-device (host-to/from-device memory) Send/Recv ops. PiperOrigin-RevId: 158032522
Diffstat (limited to 'tensorflow/core/kernels/sendrecv_ops.h')
-rw-r--r--tensorflow/core/kernels/sendrecv_ops.h2
1 files changed, 2 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/sendrecv_ops.h b/tensorflow/core/kernels/sendrecv_ops.h
index 6e91422682..67867e3308 100644
--- a/tensorflow/core/kernels/sendrecv_ops.h
+++ b/tensorflow/core/kernels/sendrecv_ops.h
@@ -28,6 +28,7 @@ class SendOp : public OpKernel {
private:
string key_prefix_;
+ Rendezvous::ParsedKey parsed_key_;
TF_DISALLOW_COPY_AND_ASSIGN(SendOp);
};
@@ -39,6 +40,7 @@ class RecvOp : public AsyncOpKernel {
private:
string key_prefix_;
+ Rendezvous::ParsedKey parsed_key_;
TF_DISALLOW_COPY_AND_ASSIGN(RecvOp);
};