aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Eugene Brevdo <ebrevdo@google.com>2017-10-11 11:01:29 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-10-11 11:05:35 -0700
commit4a139397e8f4b3cbd50240cfd914bac9db476965 (patch)
treecfdae90415e50478f4788fbfda88bca4ffe6c6c1
parent4d69d0408da946096163ee1d8ea068ae6698ae9d (diff)
More Variant cross-device support:
* Remove HostConstraint for ops taking Variants; they can now be copied from/to Device. * Add ResourceVariable assign operations that support variants. PiperOrigin-RevId: 171845029
-rw-r--r--tensorflow/core/framework/register_types.h4
-rw-r--r--tensorflow/core/kernels/aggregate_ops.cc10
-rw-r--r--tensorflow/core/kernels/constant_op.cc9
-rw-r--r--tensorflow/core/kernels/cwise_op_conj.cc11
-rw-r--r--tensorflow/core/kernels/resource_variable_ops.cc53
-rw-r--r--tensorflow/core/kernels/shape_op_test.cc4
-rw-r--r--tensorflow/core/kernels/shape_ops.cc44
-rw-r--r--tensorflow/python/ops/state_ops.py2
8 files changed, 68 insertions, 69 deletions
diff --git a/tensorflow/core/framework/register_types.h b/tensorflow/core/framework/register_types.h
index 3f9c307d03..61e722e57b 100644
--- a/tensorflow/core/framework/register_types.h
+++ b/tensorflow/core/framework/register_types.h
@@ -19,6 +19,7 @@ limitations under the License.
#include "tensorflow/core/framework/numeric_types.h"
#include "tensorflow/core/framework/resource_handle.h"
+#include "tensorflow/core/framework/variant.h"
#include "tensorflow/core/platform/types.h"
// Two sets of macros:
@@ -67,6 +68,7 @@ limitations under the License.
#define TF_CALL_int8(m) m(::tensorflow::int8)
#define TF_CALL_string(m) m(string)
#define TF_CALL_resource(m) m(::tensorflow::ResourceHandle)
+#define TF_CALL_variant(m) m(::tensorflow::Variant)
#define TF_CALL_complex64(m) m(::tensorflow::complex64)
#define TF_CALL_int64(m) m(::tensorflow::int64)
#define TF_CALL_uint64(m) m(::tensorflow::uint64)
@@ -96,6 +98,7 @@ limitations under the License.
#define TF_CALL_int8(m)
#define TF_CALL_string(m)
#define TF_CALL_resource(m)
+#define TF_CALL_variant(m)
#define TF_CALL_complex64(m)
#define TF_CALL_int64(m) m(::tensorflow::int64)
#define TF_CALL_uint64(m)
@@ -125,6 +128,7 @@ limitations under the License.
#define TF_CALL_int8(m)
#define TF_CALL_string(m)
#define TF_CALL_resource(m)
+#define TF_CALL_variant(m)
#define TF_CALL_complex64(m)
#define TF_CALL_int64(m)
#define TF_CALL_uint64(m)
diff --git a/tensorflow/core/kernels/aggregate_ops.cc b/tensorflow/core/kernels/aggregate_ops.cc
index 0099984f69..2f125312d0 100644
--- a/tensorflow/core/kernels/aggregate_ops.cc
+++ b/tensorflow/core/kernels/aggregate_ops.cc
@@ -229,6 +229,7 @@ REGISTER_ADDN_CPU(Variant);
TF_CALL_GPU_NUMBER_TYPES(REGISTER_ADDN_GPU);
TF_CALL_complex64(REGISTER_ADDN_GPU);
TF_CALL_complex128(REGISTER_ADDN_GPU);
+TF_CALL_variant(REGISTER_ADDN_GPU);
#undef REGISTER_ADDN_GPU
// A special GPU kernel for int32.
@@ -241,15 +242,6 @@ REGISTER_KERNEL_BUILDER(Name("AddN")
.HostMemory("sum"),
AddNOp<CPUDevice, int32>);
-// TODO(ebrevdo): Once rendezvous has been properly set up for
-// Variants, we'll no longer need a HostMemory attribute for this case.
-REGISTER_KERNEL_BUILDER(Name("AddN")
- .Device(DEVICE_GPU)
- .TypeConstraint<Variant>("T")
- .HostMemory("inputs")
- .HostMemory("sum"),
- AddNOp<GPUDevice, Variant>);
-
#endif // GOOGLE_CUDA
#ifdef TENSORFLOW_USE_SYCL
diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc
index 018ace5485..72132574a4 100644
--- a/tensorflow/core/kernels/constant_op.cc
+++ b/tensorflow/core/kernels/constant_op.cc
@@ -333,19 +333,12 @@ REGISTER_KERNEL(double, GPU);
REGISTER_KERNEL(complex64, GPU);
REGISTER_KERNEL(complex128, GPU);
REGISTER_KERNEL(int64, GPU);
+REGISTER_KERNEL(Variant, GPU);
REGISTER_KERNEL_BUILDER(Name("ZerosLike")
.Device(DEVICE_GPU)
.TypeConstraint<int32>("T")
.HostMemory("y"),
ZerosLikeOp<CPUDevice, int32>);
-// TODO(ebrevdo): Once rendezvous has been properly set up for
-// Variants, we'll no longer need a HostMemory attribute for this case.
-REGISTER_KERNEL_BUILDER(Name("ZerosLike")
- .Device(DEVICE_GPU)
- .TypeConstraint<Variant>("T")
- .HostMemory("x")
- .HostMemory("y"),
- ZerosLikeOp<GPUDevice, Variant>);
#endif // GOOGLE_CUDA
#undef REGISTER_KERNEL
diff --git a/tensorflow/core/kernels/cwise_op_conj.cc b/tensorflow/core/kernels/cwise_op_conj.cc
index 2ab8c42c53..929c54a9a1 100644
--- a/tensorflow/core/kernels/cwise_op_conj.cc
+++ b/tensorflow/core/kernels/cwise_op_conj.cc
@@ -23,14 +23,9 @@ REGISTER2(UnaryOp, CPU, "Conj", functor::conj, complex64, complex128);
REGISTER_VARIANT(UnaryVariantOp, CPU, "Conj", CONJ_VARIANT_UNARY_OP);
#if GOOGLE_CUDA
-// TODO(ebrevdo): Once rendezvous has been properly set up for
-// Variants, we'll no longer need a HostMemory attribute for this case.
-REGISTER_KERNEL_BUILDER(Name("Conj")
- .Device(DEVICE_GPU)
- .TypeConstraint<Variant>("T")
- .HostMemory("input")
- .HostMemory("output"),
- UnaryVariantOp<GPUDevice, CONJ_VARIANT_UNARY_OP>);
+REGISTER_KERNEL_BUILDER(
+ Name("Conj").Device(DEVICE_GPU).TypeConstraint<Variant>("T"),
+ UnaryVariantOp<GPUDevice, CONJ_VARIANT_UNARY_OP>);
REGISTER_KERNEL_BUILDER(
Name("Conj").Device(DEVICE_GPU).TypeConstraint<complex64>("T"),
UnaryOp<GPUDevice, functor::conj<complex64>>);
diff --git a/tensorflow/core/kernels/resource_variable_ops.cc b/tensorflow/core/kernels/resource_variable_ops.cc
index e45abb6c56..3cca493972 100644
--- a/tensorflow/core/kernels/resource_variable_ops.cc
+++ b/tensorflow/core/kernels/resource_variable_ops.cc
@@ -127,7 +127,7 @@ REGISTER_KERNEL_BUILDER(
.Device(DEVICE_GPU) \
.HostMemory("resource") \
.TypeConstraint<type>("dtype"), \
- ResourceHandleOp<Var>) \
+ ResourceHandleOp<Var>)
TF_CALL_GPU_ALL_TYPES(REGISTER_GPU_KERNELS);
#undef REGISTER_GPU_KERNELS
@@ -272,6 +272,56 @@ class AssignVariableOp : public OpKernel {
DataType dtype_;
};
+template <typename Device>
+class AssignVariableOp<Device, Variant> : public OpKernel {
+ public:
+ explicit AssignVariableOp(OpKernelConstruction* c) : OpKernel(c) {
+ OP_REQUIRES_OK(c, c->GetAttr("dtype", &dtype_));
+ OP_REQUIRES(c, dtype_ == DT_VARIANT,
+ errors::Internal("Variant kernel called with dtype: ",
+ DataTypeString(dtype_)));
+ }
+
+ void Compute(OpKernelContext* context) override {
+ const Tensor& value = context->input(1);
+ OP_REQUIRES(context, dtype_ == value.dtype(),
+ errors::InvalidArgument(
+ "Variable and value dtypes don't match; respectively, ",
+ dtype_, " and ", context->input(1).dtype()));
+
+ Var* variable = nullptr;
+ OP_REQUIRES_OK(context, LookupOrCreateResource<Var>(
+ context, HandleFromInput(context, 0), &variable,
+ [this, context](Var** ptr) {
+ *ptr = new Var(dtype_);
+ // Create an empty new Variant tensor.
+ return Status::OK();
+ }));
+ core::ScopedUnref s(variable);
+
+ OP_REQUIRES(context, variable->tensor()->dtype() == DT_VARIANT,
+ errors::InvalidArgument(
+ "Trying to assign variable with wrong dtype. Expected ",
+ DataTypeString(variable->tensor()->dtype()), " got ",
+ DataTypeString(DT_VARIANT)));
+
+ mutex_lock ml(*variable->mu());
+ // TODO(ebrevdo): Add a proper Variant deep copy / assign registry
+ // entry and use that here. For now, use a serialization
+ // roundtrip to perform the copy on CPU. This is OK because this
+ // op is not registered for GPU.
+ *variable->tensor() = Tensor();
+ TensorProto tmp;
+ value.AsProtoTensorContent(&tmp);
+ OP_REQUIRES(context, variable->tensor()->FromProto(tmp),
+ errors::Internal("Could not properly reserialize values "
+ "Variant. Check logs for more details."));
+ }
+
+ private:
+ DataType dtype_;
+};
+
#define REGISTER_KERNELS(type) \
REGISTER_KERNEL_BUILDER(Name("AssignVariableOp") \
.Device(DEVICE_CPU) \
@@ -280,6 +330,7 @@ class AssignVariableOp : public OpKernel {
TF_CALL_ALL_TYPES(REGISTER_KERNELS);
TF_CALL_QUANTIZED_TYPES(REGISTER_KERNELS);
+TF_CALL_variant(REGISTER_KERNELS);
#undef REGISTER_KERNELS
#if GOOGLE_CUDA
diff --git a/tensorflow/core/kernels/shape_op_test.cc b/tensorflow/core/kernels/shape_op_test.cc
index 96eaa4ac75..a545fb146c 100644
--- a/tensorflow/core/kernels/shape_op_test.cc
+++ b/tensorflow/core/kernels/shape_op_test.cc
@@ -68,7 +68,9 @@ static void ExpectHasError(const Status& s, const string& substr) {
}
TEST_F(ShapeOpTest, Simple) {
- Scope root = Scope::NewRootScope();
+ // Ensure the ops run on CPU, as we have no device copy registration
+ // for NoKnownShape and KnownVecSize objects.
+ Scope root = Scope::NewRootScope().WithDevice("/cpu:0");
// Use a placeholder so the graph optimizer doesn't optimize away
// the shape function.
diff --git a/tensorflow/core/kernels/shape_ops.cc b/tensorflow/core/kernels/shape_ops.cc
index 98cd208576..721f9b949b 100644
--- a/tensorflow/core/kernels/shape_ops.cc
+++ b/tensorflow/core/kernels/shape_ops.cc
@@ -84,6 +84,7 @@ REGISTER_KERNEL_BUILDER(Name("Shape")
TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL);
TF_CALL_bool(REGISTER_GPU_KERNEL);
+TF_CALL_variant(REGISTER_GPU_KERNEL);
#undef REGISTER_GPU_KERNEL
// A special GPU kernel for int32.
@@ -104,23 +105,6 @@ REGISTER_KERNEL_BUILDER(Name("Shape")
.TypeConstraint<int64>("out_type"),
ShapeOp<int64>);
-// TODO(ebrevdo): Once rendezvous has been properly set up for
-// Variants, we'll no longer need a HostMemory attribute for this case.
-REGISTER_KERNEL_BUILDER(Name("Shape")
- .Device(DEVICE_GPU)
- .HostMemory("input")
- .HostMemory("output")
- .TypeConstraint<Variant>("T")
- .TypeConstraint<int32>("out_type"),
- ShapeOp<int32>);
-REGISTER_KERNEL_BUILDER(Name("Shape")
- .Device(DEVICE_GPU)
- .HostMemory("input")
- .HostMemory("output")
- .TypeConstraint<Variant>("T")
- .TypeConstraint<int64>("out_type"),
- ShapeOp<int64>);
-
#endif // GOOGLE_CUDA
// ShapeN ---------------------------------------
@@ -245,6 +229,7 @@ REGISTER_KERNEL_BUILDER(Name("Rank")
.HostMemory("output"), \
RankOp);
TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL);
+TF_CALL_variant(REGISTER_GPU_KERNEL);
#undef REGISTER_GPU_KERNEL
// A special GPU kernel for int32 and bool.
@@ -264,14 +249,6 @@ REGISTER_KERNEL_BUILDER(Name("Rank")
.HostMemory("output"),
RankOp);
-// TODO(ebrevdo): Once rendezvous has been properly set up for
-// Variants, we'll no longer need a HostMemory attribute for this case.
-REGISTER_KERNEL_BUILDER(Name("Rank")
- .Device(DEVICE_GPU)
- .TypeConstraint<Variant>("T")
- .HostMemory("input")
- .HostMemory("output"),
- RankOp);
#endif // GOOGLE_CUDA
// Size ------------------------------------------
@@ -302,6 +279,7 @@ REGISTER_KERNEL_BUILDER(Name("Size")
SizeOp<int64>);
TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL);
TF_CALL_bool(REGISTER_GPU_KERNEL);
+TF_CALL_variant(REGISTER_GPU_KERNEL);
#undef REGISTER_GPU_KERNEL
// A special GPU kernel for int32.
@@ -322,22 +300,6 @@ REGISTER_KERNEL_BUILDER(Name("Size")
.HostMemory("output"),
SizeOp<int64>);
-// TODO(ebrevdo): Once rendezvous has been properly set up for
-// Variants, we'll no longer need a HostMemory attribute for this case.
-REGISTER_KERNEL_BUILDER(Name("Size")
- .Device(DEVICE_GPU)
- .TypeConstraint<Variant>("T")
- .TypeConstraint<int32>("out_type")
- .HostMemory("input")
- .HostMemory("output"),
- SizeOp<int32>);
-REGISTER_KERNEL_BUILDER(Name("Size")
- .Device(DEVICE_GPU)
- .TypeConstraint<Variant>("T")
- .TypeConstraint<int64>("out_type")
- .HostMemory("input")
- .HostMemory("output"),
- SizeOp<int64>);
#endif // GOOGLE_CUDA
#ifdef TENSORFLOW_USE_SYCL
diff --git a/tensorflow/python/ops/state_ops.py b/tensorflow/python/ops/state_ops.py
index f54bbfe90e..65ec2d4b77 100644
--- a/tensorflow/python/ops/state_ops.py
+++ b/tensorflow/python/ops/state_ops.py
@@ -124,7 +124,7 @@ def variable_op_v2(shape, dtype, name="Variable", container="", shared_name=""):
with this shared_name. Otherwise, the node name is used instead.
Returns:
- A variable tensor.1;5A
+ A variable tensor.
"""
return gen_state_ops._variable_v2(shape=shape,
dtype=dtype,