More Variant cross-device support:

* Remove HostConstraint for ops taking Variants; they can now be copied from/to Device. * Add ResourceVariable assign operations that support variants. PiperOrigin-RevId: 171845029
author: Eugene Brevdo <ebrevdo@google.com> 2017-10-11 11:01:29 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-10-11 11:05:35 -0700
commit: 4a139397e8f4b3cbd50240cfd914bac9db476965 (patch)
tree: cfdae90415e50478f4788fbfda88bca4ffe6c6c1
parent: 4d69d0408da946096163ee1d8ea068ae6698ae9d (diff)
8 files changed, 68 insertions, 69 deletions
diff --git a/tensorflow/core/framework/register_types.h b/tensorflow/core/framework/register_types.h
index 3f9c307d03..61e722e57b 100644
--- a/tensorflow/core/framework/register_types.h
+++ b/tensorflow/core/framework/register_types.h
@@ -19,6 +19,7 @@ limitations under the License.
 
 #include "tensorflow/core/framework/numeric_types.h"
 #include "tensorflow/core/framework/resource_handle.h"
+#include "tensorflow/core/framework/variant.h"
 #include "tensorflow/core/platform/types.h"
 
 // Two sets of macros:
@@ -67,6 +68,7 @@ limitations under the License.
 #define TF_CALL_int8(m) m(::tensorflow::int8)
 #define TF_CALL_string(m) m(string)
 #define TF_CALL_resource(m) m(::tensorflow::ResourceHandle)
+#define TF_CALL_variant(m) m(::tensorflow::Variant)
 #define TF_CALL_complex64(m) m(::tensorflow::complex64)
 #define TF_CALL_int64(m) m(::tensorflow::int64)
 #define TF_CALL_uint64(m) m(::tensorflow::uint64)
@@ -96,6 +98,7 @@ limitations under the License.
 #define TF_CALL_int8(m)
 #define TF_CALL_string(m)
 #define TF_CALL_resource(m)
+#define TF_CALL_variant(m)
 #define TF_CALL_complex64(m)
 #define TF_CALL_int64(m) m(::tensorflow::int64)
 #define TF_CALL_uint64(m)
@@ -125,6 +128,7 @@ limitations under the License.
 #define TF_CALL_int8(m)
 #define TF_CALL_string(m)
 #define TF_CALL_resource(m)
+#define TF_CALL_variant(m)
 #define TF_CALL_complex64(m)
 #define TF_CALL_int64(m)
 #define TF_CALL_uint64(m)
diff --git a/tensorflow/core/kernels/aggregate_ops.cc b/tensorflow/core/kernels/aggregate_ops.cc
index 0099984f69..2f125312d0 100644
--- a/tensorflow/core/kernels/aggregate_ops.cc
+++ b/tensorflow/core/kernels/aggregate_ops.cc
@@ -229,6 +229,7 @@ REGISTER_ADDN_CPU(Variant);
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_ADDN_GPU);
 TF_CALL_complex64(REGISTER_ADDN_GPU);
 TF_CALL_complex128(REGISTER_ADDN_GPU);
+TF_CALL_variant(REGISTER_ADDN_GPU);
 #undef REGISTER_ADDN_GPU
 
 // A special GPU kernel for int32.
@@ -241,15 +242,6 @@ REGISTER_KERNEL_BUILDER(Name("AddN")
                             .HostMemory("sum"),
                         AddNOp<CPUDevice, int32>);
 
-// TODO(ebrevdo): Once rendezvous has been properly set up for
-// Variants, we'll no longer need a HostMemory attribute for this case.
-REGISTER_KERNEL_BUILDER(Name("AddN")
-                            .Device(DEVICE_GPU)
-                            .TypeConstraint<Variant>("T")
-                            .HostMemory("inputs")
-                            .HostMemory("sum"),
-                        AddNOp<GPUDevice, Variant>);
-
 #endif  // GOOGLE_CUDA
 
 #ifdef TENSORFLOW_USE_SYCL
diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc
index 018ace5485..72132574a4 100644
--- a/tensorflow/core/kernels/constant_op.cc
+++ b/tensorflow/core/kernels/constant_op.cc
@@ -333,19 +333,12 @@ REGISTER_KERNEL(double, GPU);
 REGISTER_KERNEL(complex64, GPU);
 REGISTER_KERNEL(complex128, GPU);
 REGISTER_KERNEL(int64, GPU);
+REGISTER_KERNEL(Variant, GPU);
 REGISTER_KERNEL_BUILDER(Name("ZerosLike")
                             .Device(DEVICE_GPU)
                             .TypeConstraint<int32>("T")
                             .HostMemory("y"),
                         ZerosLikeOp<CPUDevice, int32>);
-// TODO(ebrevdo): Once rendezvous has been properly set up for
-// Variants, we'll no longer need a HostMemory attribute for this case.
-REGISTER_KERNEL_BUILDER(Name("ZerosLike")
-                            .Device(DEVICE_GPU)
-                            .TypeConstraint<Variant>("T")
-                            .HostMemory("x")
-                            .HostMemory("y"),
-                        ZerosLikeOp<GPUDevice, Variant>);
 #endif  // GOOGLE_CUDA
 
 #undef REGISTER_KERNEL
diff --git a/tensorflow/core/kernels/cwise_op_conj.cc b/tensorflow/core/kernels/cwise_op_conj.cc
index 2ab8c42c53..929c54a9a1 100644
--- a/tensorflow/core/kernels/cwise_op_conj.cc
+++ b/tensorflow/core/kernels/cwise_op_conj.cc
@@ -23,14 +23,9 @@ REGISTER2(UnaryOp, CPU, "Conj", functor::conj, complex64, complex128);
 REGISTER_VARIANT(UnaryVariantOp, CPU, "Conj", CONJ_VARIANT_UNARY_OP);
 
 #if GOOGLE_CUDA
-// TODO(ebrevdo): Once rendezvous has been properly set up for
-// Variants, we'll no longer need a HostMemory attribute for this case.
-REGISTER_KERNEL_BUILDER(Name("Conj")
-                            .Device(DEVICE_GPU)
-                            .TypeConstraint<Variant>("T")
-                            .HostMemory("input")
-                            .HostMemory("output"),
-                        UnaryVariantOp<GPUDevice, CONJ_VARIANT_UNARY_OP>);
+REGISTER_KERNEL_BUILDER(
+    Name("Conj").Device(DEVICE_GPU).TypeConstraint<Variant>("T"),
+    UnaryVariantOp<GPUDevice, CONJ_VARIANT_UNARY_OP>);
 REGISTER_KERNEL_BUILDER(
     Name("Conj").Device(DEVICE_GPU).TypeConstraint<complex64>("T"),
     UnaryOp<GPUDevice, functor::conj<complex64>>);
diff --git a/tensorflow/core/kernels/resource_variable_ops.cc b/tensorflow/core/kernels/resource_variable_ops.cc
index e45abb6c56..3cca493972 100644
--- a/tensorflow/core/kernels/resource_variable_ops.cc
+++ b/tensorflow/core/kernels/resource_variable_ops.cc
@@ -127,7 +127,7 @@ REGISTER_KERNEL_BUILDER(
                               .Device(DEVICE_GPU)              \
                               .HostMemory("resource")          \
                               .TypeConstraint<type>("dtype"),  \
-                          ResourceHandleOp<Var>)               \
+                          ResourceHandleOp<Var>)
 
 TF_CALL_GPU_ALL_TYPES(REGISTER_GPU_KERNELS);
 #undef REGISTER_GPU_KERNELS
@@ -272,6 +272,56 @@ class AssignVariableOp : public OpKernel {
   DataType dtype_;
 };
 
+template <typename Device>
+class AssignVariableOp<Device, Variant> : public OpKernel {
+ public:
+  explicit AssignVariableOp(OpKernelConstruction* c) : OpKernel(c) {
+    OP_REQUIRES_OK(c, c->GetAttr("dtype", &dtype_));
+    OP_REQUIRES(c, dtype_ == DT_VARIANT,
+                errors::Internal("Variant kernel called with dtype: ",
+                                 DataTypeString(dtype_)));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    const Tensor& value = context->input(1);
+    OP_REQUIRES(context, dtype_ == value.dtype(),
+                errors::InvalidArgument(
+                    "Variable and value dtypes don't match; respectively, ",
+                    dtype_, " and ", context->input(1).dtype()));
+
+    Var* variable = nullptr;
+    OP_REQUIRES_OK(context, LookupOrCreateResource<Var>(
+                                context, HandleFromInput(context, 0), &variable,
+                                [this, context](Var** ptr) {
+                                  *ptr = new Var(dtype_);
+                                  // Create an empty new Variant tensor.
+                                  return Status::OK();
+                                }));
+    core::ScopedUnref s(variable);
+
+    OP_REQUIRES(context, variable->tensor()->dtype() == DT_VARIANT,
+                errors::InvalidArgument(
+                    "Trying to assign variable with wrong dtype. Expected ",
+                    DataTypeString(variable->tensor()->dtype()), " got ",
+                    DataTypeString(DT_VARIANT)));
+
+    mutex_lock ml(*variable->mu());
+    // TODO(ebrevdo): Add a proper Variant deep copy / assign registry
+    // entry and use that here.  For now, use a serialization
+    // roundtrip to perform the copy on CPU.  This is OK because this
+    // op is not registered for GPU.
+    *variable->tensor() = Tensor();
+    TensorProto tmp;
+    value.AsProtoTensorContent(&tmp);
+    OP_REQUIRES(context, variable->tensor()->FromProto(tmp),
+                errors::Internal("Could not properly reserialize values "
+                                 "Variant.  Check logs for more details."));
+  }
+
+ private:
+  DataType dtype_;
+};
+
 #define REGISTER_KERNELS(type)                                \
   REGISTER_KERNEL_BUILDER(Name("AssignVariableOp")            \
                               .Device(DEVICE_CPU)             \
@@ -280,6 +330,7 @@ class AssignVariableOp : public OpKernel {
 
 TF_CALL_ALL_TYPES(REGISTER_KERNELS);
 TF_CALL_QUANTIZED_TYPES(REGISTER_KERNELS);
+TF_CALL_variant(REGISTER_KERNELS);
 #undef REGISTER_KERNELS
 
 #if GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/shape_op_test.cc b/tensorflow/core/kernels/shape_op_test.cc
index 96eaa4ac75..a545fb146c 100644
--- a/tensorflow/core/kernels/shape_op_test.cc
+++ b/tensorflow/core/kernels/shape_op_test.cc
@@ -68,7 +68,9 @@ static void ExpectHasError(const Status& s, const string& substr) {
 }
 
 TEST_F(ShapeOpTest, Simple) {
-  Scope root = Scope::NewRootScope();
+  // Ensure the ops run on CPU, as we have no device copy registration
+  // for NoKnownShape and KnownVecSize objects.
+  Scope root = Scope::NewRootScope().WithDevice("/cpu:0");
 
   // Use a placeholder so the graph optimizer doesn't optimize away
   // the shape function.
diff --git a/tensorflow/core/kernels/shape_ops.cc b/tensorflow/core/kernels/shape_ops.cc
index 98cd208576..721f9b949b 100644
--- a/tensorflow/core/kernels/shape_ops.cc
+++ b/tensorflow/core/kernels/shape_ops.cc
@@ -84,6 +84,7 @@ REGISTER_KERNEL_BUILDER(Name("Shape")
 
 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL);
 TF_CALL_bool(REGISTER_GPU_KERNEL);
+TF_CALL_variant(REGISTER_GPU_KERNEL);
 #undef REGISTER_GPU_KERNEL
 
 // A special GPU kernel for int32.
@@ -104,23 +105,6 @@ REGISTER_KERNEL_BUILDER(Name("Shape")
                             .TypeConstraint<int64>("out_type"),
                         ShapeOp<int64>);
 
-// TODO(ebrevdo): Once rendezvous has been properly set up for
-// Variants, we'll no longer need a HostMemory attribute for this case.
-REGISTER_KERNEL_BUILDER(Name("Shape")
-                            .Device(DEVICE_GPU)
-                            .HostMemory("input")
-                            .HostMemory("output")
-                            .TypeConstraint<Variant>("T")
-                            .TypeConstraint<int32>("out_type"),
-                        ShapeOp<int32>);
-REGISTER_KERNEL_BUILDER(Name("Shape")
-                            .Device(DEVICE_GPU)
-                            .HostMemory("input")
-                            .HostMemory("output")
-                            .TypeConstraint<Variant>("T")
-                            .TypeConstraint<int64>("out_type"),
-                        ShapeOp<int64>);
-
 #endif  // GOOGLE_CUDA
 
 // ShapeN ---------------------------------------
@@ -245,6 +229,7 @@ REGISTER_KERNEL_BUILDER(Name("Rank")
                               .HostMemory("output"),     \
                           RankOp);
 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL);
+TF_CALL_variant(REGISTER_GPU_KERNEL);
 #undef REGISTER_GPU_KERNEL
 
 // A special GPU kernel for int32 and bool.
@@ -264,14 +249,6 @@ REGISTER_KERNEL_BUILDER(Name("Rank")
                             .HostMemory("output"),
                         RankOp);
 
-// TODO(ebrevdo): Once rendezvous has been properly set up for
-// Variants, we'll no longer need a HostMemory attribute for this case.
-REGISTER_KERNEL_BUILDER(Name("Rank")
-                            .Device(DEVICE_GPU)
-                            .TypeConstraint<Variant>("T")
-                            .HostMemory("input")
-                            .HostMemory("output"),
-                        RankOp);
 #endif  // GOOGLE_CUDA
 
 // Size ------------------------------------------
@@ -302,6 +279,7 @@ REGISTER_KERNEL_BUILDER(Name("Size")
                           SizeOp<int64>);
 TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL);
 TF_CALL_bool(REGISTER_GPU_KERNEL);
+TF_CALL_variant(REGISTER_GPU_KERNEL);
 #undef REGISTER_GPU_KERNEL
 
 // A special GPU kernel for int32.
@@ -322,22 +300,6 @@ REGISTER_KERNEL_BUILDER(Name("Size")
                             .HostMemory("output"),
                         SizeOp<int64>);
 
-// TODO(ebrevdo): Once rendezvous has been properly set up for
-// Variants, we'll no longer need a HostMemory attribute for this case.
-REGISTER_KERNEL_BUILDER(Name("Size")
-                            .Device(DEVICE_GPU)
-                            .TypeConstraint<Variant>("T")
-                            .TypeConstraint<int32>("out_type")
-                            .HostMemory("input")
-                            .HostMemory("output"),
-                        SizeOp<int32>);
-REGISTER_KERNEL_BUILDER(Name("Size")
-                            .Device(DEVICE_GPU)
-                            .TypeConstraint<Variant>("T")
-                            .TypeConstraint<int64>("out_type")
-                            .HostMemory("input")
-                            .HostMemory("output"),
-                        SizeOp<int64>);
 #endif  // GOOGLE_CUDA
 
 #ifdef TENSORFLOW_USE_SYCL
diff --git a/tensorflow/python/ops/state_ops.py b/tensorflow/python/ops/state_ops.py
index f54bbfe90e..65ec2d4b77 100644
--- a/tensorflow/python/ops/state_ops.py
+++ b/tensorflow/python/ops/state_ops.py
@@ -124,7 +124,7 @@ def variable_op_v2(shape, dtype, name="Variable", container="", shared_name=""):
       with this shared_name. Otherwise, the node name is used instead.
 
   Returns:
-    A variable tensor.1;5A
+    A variable tensor.
   """
   return gen_state_ops._variable_v2(shape=shape,
                                     dtype=dtype,
author	Eugene Brevdo <ebrevdo@google.com>	2017-10-11 11:01:29 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-10-11 11:05:35 -0700
commit	4a139397e8f4b3cbd50240cfd914bac9db476965 (patch)
tree	cfdae90415e50478f4788fbfda88bca4ffe6c6c1
parent	4d69d0408da946096163ee1d8ea068ae6698ae9d (diff)