aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/aggregate_ops.cc
diff options
context:
space:
mode:
authorGravatar Eugene Brevdo <ebrevdo@google.com>2017-09-08 11:14:21 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-09-08 11:18:21 -0700
commit477a221a2ffee7261220ad6c0f4f8c76a5eb7931 (patch)
tree280e78e8f928390d886e6b0e5a03a71c588a51a5 /tensorflow/core/kernels/aggregate_ops.cc
parent96828c9f5276a759717e0d9574b34bcd456d11a5 (diff)
Modify variant registry to have UnaryOp and BinaryOp registrations. Speed up registry lookup.
* Op type is described as an enum (separate enums for unary and binary ops). * Modified ZerosLike registrations to unary registrations with ZEROS_LIKE enum. * Added Add(a,b) registrations as binary registrations with ADD enum. * AddN op uses ADD BinaryOp registrations and ZerosLike op modified to use ZEROS_LIKE UnaryOp registrations. * Modified the registry tables' keys from string type to StringPiece type. The reduced copying should speed up registry lookups by ops. Required creating a backing store for device and type_name strings passed in at registration. PiperOrigin-RevId: 168020449
Diffstat (limited to 'tensorflow/core/kernels/aggregate_ops.cc')
-rw-r--r--tensorflow/core/kernels/aggregate_ops.cc78
1 files changed, 76 insertions, 2 deletions
diff --git a/tensorflow/core/kernels/aggregate_ops.cc b/tensorflow/core/kernels/aggregate_ops.cc
index 0aa65729de..0099984f69 100644
--- a/tensorflow/core/kernels/aggregate_ops.cc
+++ b/tensorflow/core/kernels/aggregate_ops.cc
@@ -24,6 +24,9 @@ limitations under the License.
#include "tensorflow/core/framework/numeric_op.h"
#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/variant.h"
+#include "tensorflow/core/framework/variant_encode_decode.h"
+#include "tensorflow/core/framework/variant_op_registry.h"
#include "tensorflow/core/lib/gtl/inlined_vector.h"
#include "tensorflow/core/platform/logging.h"
@@ -33,7 +36,7 @@ typedef Eigen::ThreadPoolDevice CPUDevice;
typedef Eigen::GpuDevice GPUDevice;
#ifdef TENSORFLOW_USE_SYCL
typedef Eigen::SyclDevice SYCLDevice;
-#endif // TENSORFLOW_USE_SYCL
+#endif // TENSORFLOW_USE_SYCL
template <typename Device, typename T>
class AddNOp : public OpKernel {
@@ -150,6 +153,65 @@ class AddNOp : public OpKernel {
}
};
+template <typename Device>
+class AddNOp<Device, Variant> : public OpKernel {
+ public:
+ explicit AddNOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+ void Compute(OpKernelContext* ctx) override {
+ if (!ctx->ValidateInputsAreSameShape(this)) return;
+
+ const Tensor& input0 = ctx->input(0);
+ const int num = ctx->num_inputs();
+
+ if (num == 1) {
+ ctx->set_output(0, input0);
+ return;
+ }
+
+ for (int i = 0; i < num; ++i) {
+ // Step 1: ensure unary variants.
+ OP_REQUIRES(
+ ctx, ctx->input(i).dims() == 0,
+ errors::InvalidArgument(
+ "AddN of non-scalar Tensor with dtype=DT_VARIANT is not "
+ "supported; inputs[",
+ i, " has shape: ", ctx->input(i).shape().DebugString(), "."));
+ }
+
+ TensorShape common_shape;
+ OP_REQUIRES_OK(ctx, GetUnaryVariantShape(ctx->input(0), &common_shape));
+ // Step 2: access all variants and ensure shapes match.
+ for (int i = 1; i < num; ++i) {
+ TensorShape check_shape;
+ OP_REQUIRES_OK(ctx, GetUnaryVariantShape(ctx->input(i), &check_shape));
+ OP_REQUIRES(ctx, common_shape == check_shape,
+ errors::InvalidArgument(
+ "AddN of Variants of differing shapes; inputs[0] shape: ",
+ common_shape.DebugString(), ", inputs[", i,
+ "] shape: ", check_shape.DebugString()));
+ }
+
+ // Step 3: attempt to add using
+ // BinaryOpVariants(ADD_VARIANT_BINARY_OP, ...)
+ // For the output create a default-constructed variant object.
+ // TODO(ebrevdo): Perform summation in a tree-structure.
+ Tensor out(cpu_allocator(), DT_VARIANT, TensorShape({}));
+ Variant* v_out = &(out.scalar<Variant>()());
+ OP_REQUIRES_OK(
+ ctx, BinaryOpVariants<Device>(
+ ctx, ADD_VARIANT_BINARY_OP, ctx->input(0).scalar<Variant>()(),
+ ctx->input(1).scalar<Variant>()(), v_out));
+ for (int i = 2; i < num; ++i) {
+ const Variant tmp = std::move(*v_out);
+ const Variant& inp = ctx->input(i).scalar<Variant>()();
+ OP_REQUIRES_OK(ctx, BinaryOpVariants<Device>(ctx, ADD_VARIANT_BINARY_OP,
+ inp, tmp, v_out));
+ }
+ ctx->set_output(0, out);
+ }
+};
+
#define REGISTER_ADDN(type, dev) \
REGISTER_KERNEL_BUILDER( \
Name("AddN").Device(DEVICE_##dev).TypeConstraint<type>("T"), \
@@ -158,6 +220,8 @@ class AddNOp : public OpKernel {
#define REGISTER_ADDN_CPU(type) REGISTER_ADDN(type, CPU)
TF_CALL_NUMBER_TYPES(REGISTER_ADDN_CPU);
+REGISTER_ADDN_CPU(Variant);
+
#undef REGISTER_ADDN_CPU
#if GOOGLE_CUDA
@@ -176,6 +240,16 @@ REGISTER_KERNEL_BUILDER(Name("AddN")
.HostMemory("inputs")
.HostMemory("sum"),
AddNOp<CPUDevice, int32>);
+
+// TODO(ebrevdo): Once rendezvous has been properly set up for
+// Variants, we'll no longer need a HostMemory attribute for this case.
+REGISTER_KERNEL_BUILDER(Name("AddN")
+ .Device(DEVICE_GPU)
+ .TypeConstraint<Variant>("T")
+ .HostMemory("inputs")
+ .HostMemory("sum"),
+ AddNOp<GPUDevice, Variant>);
+
#endif // GOOGLE_CUDA
#ifdef TENSORFLOW_USE_SYCL
@@ -191,7 +265,7 @@ REGISTER_KERNEL_BUILDER(Name("AddN")
.HostMemory("inputs")
.HostMemory("sum"),
AddNOp<CPUDevice, int32>);
-#endif // TENSORFLOW_USE_SYCL
+#endif // TENSORFLOW_USE_SYCL
#undef REGISTER_ADDN