Modify variant registry to have UnaryOp and BinaryOp registrations. Speed up registry lookup.

* Op type is described as an enum (separate enums for unary and binary ops). * Modified ZerosLike registrations to unary registrations with ZEROS_LIKE enum. * Added Add(a,b) registrations as binary registrations with ADD enum. * AddN op uses ADD BinaryOp registrations and ZerosLike op modified to use ZEROS_LIKE UnaryOp registrations. * Modified the registry tables' keys from string type to StringPiece type. The reduced copying should speed up registry lookups by ops. Required creating a backing store for device and type_name strings passed in at registration. PiperOrigin-RevId: 168020449
author: Eugene Brevdo <ebrevdo@google.com> 2017-09-08 11:14:21 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-09-08 11:18:21 -0700
commit: 477a221a2ffee7261220ad6c0f4f8c76a5eb7931 (patch)
tree: 280e78e8f928390d886e6b0e5a03a71c588a51a5 /tensorflow/core/kernels/aggregate_ops.cc
parent: 96828c9f5276a759717e0d9574b34bcd456d11a5 (diff)
1 files changed, 76 insertions, 2 deletions
diff --git a/tensorflow/core/kernels/aggregate_ops.cc b/tensorflow/core/kernels/aggregate_ops.cc
index 0aa65729de..0099984f69 100644
--- a/tensorflow/core/kernels/aggregate_ops.cc
+++ b/tensorflow/core/kernels/aggregate_ops.cc
@@ -24,6 +24,9 @@ limitations under the License.
 
 #include "tensorflow/core/framework/numeric_op.h"
 #include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/variant.h"
+#include "tensorflow/core/framework/variant_encode_decode.h"
+#include "tensorflow/core/framework/variant_op_registry.h"
 #include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/platform/logging.h"
 
@@ -33,7 +36,7 @@ typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
 #ifdef TENSORFLOW_USE_SYCL
 typedef Eigen::SyclDevice SYCLDevice;
-#endif // TENSORFLOW_USE_SYCL
+#endif  // TENSORFLOW_USE_SYCL
 
 template <typename Device, typename T>
 class AddNOp : public OpKernel {
@@ -150,6 +153,65 @@ class AddNOp : public OpKernel {
   }
 };
 
+template <typename Device>
+class AddNOp<Device, Variant> : public OpKernel {
+ public:
+  explicit AddNOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  void Compute(OpKernelContext* ctx) override {
+    if (!ctx->ValidateInputsAreSameShape(this)) return;
+
+    const Tensor& input0 = ctx->input(0);
+    const int num = ctx->num_inputs();
+
+    if (num == 1) {
+      ctx->set_output(0, input0);
+      return;
+    }
+
+    for (int i = 0; i < num; ++i) {
+      // Step 1: ensure unary variants.
+      OP_REQUIRES(
+          ctx, ctx->input(i).dims() == 0,
+          errors::InvalidArgument(
+              "AddN of non-scalar Tensor with dtype=DT_VARIANT is not "
+              "supported; inputs[",
+              i, " has shape: ", ctx->input(i).shape().DebugString(), "."));
+    }
+
+    TensorShape common_shape;
+    OP_REQUIRES_OK(ctx, GetUnaryVariantShape(ctx->input(0), &common_shape));
+    // Step 2: access all variants and ensure shapes match.
+    for (int i = 1; i < num; ++i) {
+      TensorShape check_shape;
+      OP_REQUIRES_OK(ctx, GetUnaryVariantShape(ctx->input(i), &check_shape));
+      OP_REQUIRES(ctx, common_shape == check_shape,
+                  errors::InvalidArgument(
+                      "AddN of Variants of differing shapes; inputs[0] shape: ",
+                      common_shape.DebugString(), ", inputs[", i,
+                      "] shape: ", check_shape.DebugString()));
+    }
+
+    // Step 3: attempt to add using
+    //   BinaryOpVariants(ADD_VARIANT_BINARY_OP, ...)
+    //   For the output create a default-constructed variant object.
+    // TODO(ebrevdo): Perform summation in a tree-structure.
+    Tensor out(cpu_allocator(), DT_VARIANT, TensorShape({}));
+    Variant* v_out = &(out.scalar<Variant>()());
+    OP_REQUIRES_OK(
+        ctx, BinaryOpVariants<Device>(
+                 ctx, ADD_VARIANT_BINARY_OP, ctx->input(0).scalar<Variant>()(),
+                 ctx->input(1).scalar<Variant>()(), v_out));
+    for (int i = 2; i < num; ++i) {
+      const Variant tmp = std::move(*v_out);
+      const Variant& inp = ctx->input(i).scalar<Variant>()();
+      OP_REQUIRES_OK(ctx, BinaryOpVariants<Device>(ctx, ADD_VARIANT_BINARY_OP,
+                                                   inp, tmp, v_out));
+    }
+    ctx->set_output(0, out);
+  }
+};
+
 #define REGISTER_ADDN(type, dev)                                   \
   REGISTER_KERNEL_BUILDER(                                         \
       Name("AddN").Device(DEVICE_##dev).TypeConstraint<type>("T"), \
@@ -158,6 +220,8 @@ class AddNOp : public OpKernel {
 #define REGISTER_ADDN_CPU(type) REGISTER_ADDN(type, CPU)
 
 TF_CALL_NUMBER_TYPES(REGISTER_ADDN_CPU);
+REGISTER_ADDN_CPU(Variant);
+
 #undef REGISTER_ADDN_CPU
 
 #if GOOGLE_CUDA
@@ -176,6 +240,16 @@ REGISTER_KERNEL_BUILDER(Name("AddN")
                             .HostMemory("inputs")
                             .HostMemory("sum"),
                         AddNOp<CPUDevice, int32>);
+
+// TODO(ebrevdo): Once rendezvous has been properly set up for
+// Variants, we'll no longer need a HostMemory attribute for this case.
+REGISTER_KERNEL_BUILDER(Name("AddN")
+                            .Device(DEVICE_GPU)
+                            .TypeConstraint<Variant>("T")
+                            .HostMemory("inputs")
+                            .HostMemory("sum"),
+                        AddNOp<GPUDevice, Variant>);
+
 #endif  // GOOGLE_CUDA
 
 #ifdef TENSORFLOW_USE_SYCL
@@ -191,7 +265,7 @@ REGISTER_KERNEL_BUILDER(Name("AddN")
                             .HostMemory("inputs")
                             .HostMemory("sum"),
                         AddNOp<CPUDevice, int32>);
-#endif // TENSORFLOW_USE_SYCL
+#endif  // TENSORFLOW_USE_SYCL
 
 #undef REGISTER_ADDN
author	Eugene Brevdo <ebrevdo@google.com>	2017-09-08 11:14:21 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-09-08 11:18:21 -0700
commit	477a221a2ffee7261220ad6c0f4f8c76a5eb7931 (patch)
tree	280e78e8f928390d886e6b0e5a03a71c588a51a5 /tensorflow/core/kernels/aggregate_ops.cc
parent	96828c9f5276a759717e0d9574b34bcd456d11a5 (diff)