aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/resource_variable_ops.h
diff options
context:
space:
mode:
authorGravatar Peter Hawkins <phawkins@google.com>2018-09-26 13:48:21 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-09-26 13:51:50 -0700
commit1736e0bbbfdeeba178dff37c970b5a0180ee013f (patch)
tree390c309b5997a752644d2c50bb4ee5bf8fc1654d /tensorflow/core/kernels/resource_variable_ops.h
parent652ce1aaefdadd04a9905a0788ab26c6fff93658 (diff)
[TF] Add new internal ops _VarHandlesOp and _ReadVariablesOp.
The purpose of these ops is to fix a latency problem observed for an inference benchmark. Often a inference step starts by reading the value of many (hundreds) of weights. For a resource variable, this requires a VarHandleOp and a ReadVariableOp per variable. Running hundreds of trivial ops can add hundreds of microseconds of latency to the critical path of an inference step. The inter-op latency of the executor can be hundreds of nanoseconds, which rapidly adds up. This change introduces two fused ops _VarHandlesOp and _ReadVariablesOp that allow us to read many variables in a pair of larger ops, rather than many tiny ops. PiperOrigin-RevId: 214662338
Diffstat (limited to 'tensorflow/core/kernels/resource_variable_ops.h')
-rw-r--r--tensorflow/core/kernels/resource_variable_ops.h10
1 files changed, 10 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/resource_variable_ops.h b/tensorflow/core/kernels/resource_variable_ops.h
index 9b60106f13..cffb732c38 100644
--- a/tensorflow/core/kernels/resource_variable_ops.h
+++ b/tensorflow/core/kernels/resource_variable_ops.h
@@ -28,6 +28,16 @@ class ReadVariableOp : public OpKernel {
DataType dtype_;
};
+class ReadVariablesOp : public OpKernel {
+ public:
+ explicit ReadVariablesOp(OpKernelConstruction* c);
+ void Compute(OpKernelContext* ctx) override;
+ bool IsExpensive() override { return false; }
+
+ private:
+ DataTypeVector dtypes_;
+};
+
class DestroyResourceOp : public OpKernel {
public:
explicit DestroyResourceOp(OpKernelConstruction* ctx);