diff options
author | 2017-10-11 11:01:29 -0700 | |
---|---|---|
committer | 2017-10-11 11:05:35 -0700 | |
commit | 4a139397e8f4b3cbd50240cfd914bac9db476965 (patch) | |
tree | cfdae90415e50478f4788fbfda88bca4ffe6c6c1 | |
parent | 4d69d0408da946096163ee1d8ea068ae6698ae9d (diff) |
More Variant cross-device support:
* Remove HostConstraint for ops taking Variants; they can now be copied from/to Device.
* Add ResourceVariable assign operations that support variants.
PiperOrigin-RevId: 171845029
-rw-r--r-- | tensorflow/core/framework/register_types.h | 4 | ||||
-rw-r--r-- | tensorflow/core/kernels/aggregate_ops.cc | 10 | ||||
-rw-r--r-- | tensorflow/core/kernels/constant_op.cc | 9 | ||||
-rw-r--r-- | tensorflow/core/kernels/cwise_op_conj.cc | 11 | ||||
-rw-r--r-- | tensorflow/core/kernels/resource_variable_ops.cc | 53 | ||||
-rw-r--r-- | tensorflow/core/kernels/shape_op_test.cc | 4 | ||||
-rw-r--r-- | tensorflow/core/kernels/shape_ops.cc | 44 | ||||
-rw-r--r-- | tensorflow/python/ops/state_ops.py | 2 |
8 files changed, 68 insertions, 69 deletions
diff --git a/tensorflow/core/framework/register_types.h b/tensorflow/core/framework/register_types.h index 3f9c307d03..61e722e57b 100644 --- a/tensorflow/core/framework/register_types.h +++ b/tensorflow/core/framework/register_types.h @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/core/framework/numeric_types.h" #include "tensorflow/core/framework/resource_handle.h" +#include "tensorflow/core/framework/variant.h" #include "tensorflow/core/platform/types.h" // Two sets of macros: @@ -67,6 +68,7 @@ limitations under the License. #define TF_CALL_int8(m) m(::tensorflow::int8) #define TF_CALL_string(m) m(string) #define TF_CALL_resource(m) m(::tensorflow::ResourceHandle) +#define TF_CALL_variant(m) m(::tensorflow::Variant) #define TF_CALL_complex64(m) m(::tensorflow::complex64) #define TF_CALL_int64(m) m(::tensorflow::int64) #define TF_CALL_uint64(m) m(::tensorflow::uint64) @@ -96,6 +98,7 @@ limitations under the License. #define TF_CALL_int8(m) #define TF_CALL_string(m) #define TF_CALL_resource(m) +#define TF_CALL_variant(m) #define TF_CALL_complex64(m) #define TF_CALL_int64(m) m(::tensorflow::int64) #define TF_CALL_uint64(m) @@ -125,6 +128,7 @@ limitations under the License. #define TF_CALL_int8(m) #define TF_CALL_string(m) #define TF_CALL_resource(m) +#define TF_CALL_variant(m) #define TF_CALL_complex64(m) #define TF_CALL_int64(m) #define TF_CALL_uint64(m) diff --git a/tensorflow/core/kernels/aggregate_ops.cc b/tensorflow/core/kernels/aggregate_ops.cc index 0099984f69..2f125312d0 100644 --- a/tensorflow/core/kernels/aggregate_ops.cc +++ b/tensorflow/core/kernels/aggregate_ops.cc @@ -229,6 +229,7 @@ REGISTER_ADDN_CPU(Variant); TF_CALL_GPU_NUMBER_TYPES(REGISTER_ADDN_GPU); TF_CALL_complex64(REGISTER_ADDN_GPU); TF_CALL_complex128(REGISTER_ADDN_GPU); +TF_CALL_variant(REGISTER_ADDN_GPU); #undef REGISTER_ADDN_GPU // A special GPU kernel for int32. @@ -241,15 +242,6 @@ REGISTER_KERNEL_BUILDER(Name("AddN") .HostMemory("sum"), AddNOp<CPUDevice, int32>); -// TODO(ebrevdo): Once rendezvous has been properly set up for -// Variants, we'll no longer need a HostMemory attribute for this case. -REGISTER_KERNEL_BUILDER(Name("AddN") - .Device(DEVICE_GPU) - .TypeConstraint<Variant>("T") - .HostMemory("inputs") - .HostMemory("sum"), - AddNOp<GPUDevice, Variant>); - #endif // GOOGLE_CUDA #ifdef TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc index 018ace5485..72132574a4 100644 --- a/tensorflow/core/kernels/constant_op.cc +++ b/tensorflow/core/kernels/constant_op.cc @@ -333,19 +333,12 @@ REGISTER_KERNEL(double, GPU); REGISTER_KERNEL(complex64, GPU); REGISTER_KERNEL(complex128, GPU); REGISTER_KERNEL(int64, GPU); +REGISTER_KERNEL(Variant, GPU); REGISTER_KERNEL_BUILDER(Name("ZerosLike") .Device(DEVICE_GPU) .TypeConstraint<int32>("T") .HostMemory("y"), ZerosLikeOp<CPUDevice, int32>); -// TODO(ebrevdo): Once rendezvous has been properly set up for -// Variants, we'll no longer need a HostMemory attribute for this case. -REGISTER_KERNEL_BUILDER(Name("ZerosLike") - .Device(DEVICE_GPU) - .TypeConstraint<Variant>("T") - .HostMemory("x") - .HostMemory("y"), - ZerosLikeOp<GPUDevice, Variant>); #endif // GOOGLE_CUDA #undef REGISTER_KERNEL diff --git a/tensorflow/core/kernels/cwise_op_conj.cc b/tensorflow/core/kernels/cwise_op_conj.cc index 2ab8c42c53..929c54a9a1 100644 --- a/tensorflow/core/kernels/cwise_op_conj.cc +++ b/tensorflow/core/kernels/cwise_op_conj.cc @@ -23,14 +23,9 @@ REGISTER2(UnaryOp, CPU, "Conj", functor::conj, complex64, complex128); REGISTER_VARIANT(UnaryVariantOp, CPU, "Conj", CONJ_VARIANT_UNARY_OP); #if GOOGLE_CUDA -// TODO(ebrevdo): Once rendezvous has been properly set up for -// Variants, we'll no longer need a HostMemory attribute for this case. -REGISTER_KERNEL_BUILDER(Name("Conj") - .Device(DEVICE_GPU) - .TypeConstraint<Variant>("T") - .HostMemory("input") - .HostMemory("output"), - UnaryVariantOp<GPUDevice, CONJ_VARIANT_UNARY_OP>); +REGISTER_KERNEL_BUILDER( + Name("Conj").Device(DEVICE_GPU).TypeConstraint<Variant>("T"), + UnaryVariantOp<GPUDevice, CONJ_VARIANT_UNARY_OP>); REGISTER_KERNEL_BUILDER( Name("Conj").Device(DEVICE_GPU).TypeConstraint<complex64>("T"), UnaryOp<GPUDevice, functor::conj<complex64>>); diff --git a/tensorflow/core/kernels/resource_variable_ops.cc b/tensorflow/core/kernels/resource_variable_ops.cc index e45abb6c56..3cca493972 100644 --- a/tensorflow/core/kernels/resource_variable_ops.cc +++ b/tensorflow/core/kernels/resource_variable_ops.cc @@ -127,7 +127,7 @@ REGISTER_KERNEL_BUILDER( .Device(DEVICE_GPU) \ .HostMemory("resource") \ .TypeConstraint<type>("dtype"), \ - ResourceHandleOp<Var>) \ + ResourceHandleOp<Var>) TF_CALL_GPU_ALL_TYPES(REGISTER_GPU_KERNELS); #undef REGISTER_GPU_KERNELS @@ -272,6 +272,56 @@ class AssignVariableOp : public OpKernel { DataType dtype_; }; +template <typename Device> +class AssignVariableOp<Device, Variant> : public OpKernel { + public: + explicit AssignVariableOp(OpKernelConstruction* c) : OpKernel(c) { + OP_REQUIRES_OK(c, c->GetAttr("dtype", &dtype_)); + OP_REQUIRES(c, dtype_ == DT_VARIANT, + errors::Internal("Variant kernel called with dtype: ", + DataTypeString(dtype_))); + } + + void Compute(OpKernelContext* context) override { + const Tensor& value = context->input(1); + OP_REQUIRES(context, dtype_ == value.dtype(), + errors::InvalidArgument( + "Variable and value dtypes don't match; respectively, ", + dtype_, " and ", context->input(1).dtype())); + + Var* variable = nullptr; + OP_REQUIRES_OK(context, LookupOrCreateResource<Var>( + context, HandleFromInput(context, 0), &variable, + [this, context](Var** ptr) { + *ptr = new Var(dtype_); + // Create an empty new Variant tensor. + return Status::OK(); + })); + core::ScopedUnref s(variable); + + OP_REQUIRES(context, variable->tensor()->dtype() == DT_VARIANT, + errors::InvalidArgument( + "Trying to assign variable with wrong dtype. Expected ", + DataTypeString(variable->tensor()->dtype()), " got ", + DataTypeString(DT_VARIANT))); + + mutex_lock ml(*variable->mu()); + // TODO(ebrevdo): Add a proper Variant deep copy / assign registry + // entry and use that here. For now, use a serialization + // roundtrip to perform the copy on CPU. This is OK because this + // op is not registered for GPU. + *variable->tensor() = Tensor(); + TensorProto tmp; + value.AsProtoTensorContent(&tmp); + OP_REQUIRES(context, variable->tensor()->FromProto(tmp), + errors::Internal("Could not properly reserialize values " + "Variant. Check logs for more details.")); + } + + private: + DataType dtype_; +}; + #define REGISTER_KERNELS(type) \ REGISTER_KERNEL_BUILDER(Name("AssignVariableOp") \ .Device(DEVICE_CPU) \ @@ -280,6 +330,7 @@ class AssignVariableOp : public OpKernel { TF_CALL_ALL_TYPES(REGISTER_KERNELS); TF_CALL_QUANTIZED_TYPES(REGISTER_KERNELS); +TF_CALL_variant(REGISTER_KERNELS); #undef REGISTER_KERNELS #if GOOGLE_CUDA diff --git a/tensorflow/core/kernels/shape_op_test.cc b/tensorflow/core/kernels/shape_op_test.cc index 96eaa4ac75..a545fb146c 100644 --- a/tensorflow/core/kernels/shape_op_test.cc +++ b/tensorflow/core/kernels/shape_op_test.cc @@ -68,7 +68,9 @@ static void ExpectHasError(const Status& s, const string& substr) { } TEST_F(ShapeOpTest, Simple) { - Scope root = Scope::NewRootScope(); + // Ensure the ops run on CPU, as we have no device copy registration + // for NoKnownShape and KnownVecSize objects. + Scope root = Scope::NewRootScope().WithDevice("/cpu:0"); // Use a placeholder so the graph optimizer doesn't optimize away // the shape function. diff --git a/tensorflow/core/kernels/shape_ops.cc b/tensorflow/core/kernels/shape_ops.cc index 98cd208576..721f9b949b 100644 --- a/tensorflow/core/kernels/shape_ops.cc +++ b/tensorflow/core/kernels/shape_ops.cc @@ -84,6 +84,7 @@ REGISTER_KERNEL_BUILDER(Name("Shape") TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL); TF_CALL_bool(REGISTER_GPU_KERNEL); +TF_CALL_variant(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL // A special GPU kernel for int32. @@ -104,23 +105,6 @@ REGISTER_KERNEL_BUILDER(Name("Shape") .TypeConstraint<int64>("out_type"), ShapeOp<int64>); -// TODO(ebrevdo): Once rendezvous has been properly set up for -// Variants, we'll no longer need a HostMemory attribute for this case. -REGISTER_KERNEL_BUILDER(Name("Shape") - .Device(DEVICE_GPU) - .HostMemory("input") - .HostMemory("output") - .TypeConstraint<Variant>("T") - .TypeConstraint<int32>("out_type"), - ShapeOp<int32>); -REGISTER_KERNEL_BUILDER(Name("Shape") - .Device(DEVICE_GPU) - .HostMemory("input") - .HostMemory("output") - .TypeConstraint<Variant>("T") - .TypeConstraint<int64>("out_type"), - ShapeOp<int64>); - #endif // GOOGLE_CUDA // ShapeN --------------------------------------- @@ -245,6 +229,7 @@ REGISTER_KERNEL_BUILDER(Name("Rank") .HostMemory("output"), \ RankOp); TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL); +TF_CALL_variant(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL // A special GPU kernel for int32 and bool. @@ -264,14 +249,6 @@ REGISTER_KERNEL_BUILDER(Name("Rank") .HostMemory("output"), RankOp); -// TODO(ebrevdo): Once rendezvous has been properly set up for -// Variants, we'll no longer need a HostMemory attribute for this case. -REGISTER_KERNEL_BUILDER(Name("Rank") - .Device(DEVICE_GPU) - .TypeConstraint<Variant>("T") - .HostMemory("input") - .HostMemory("output"), - RankOp); #endif // GOOGLE_CUDA // Size ------------------------------------------ @@ -302,6 +279,7 @@ REGISTER_KERNEL_BUILDER(Name("Size") SizeOp<int64>); TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER_GPU_KERNEL); TF_CALL_bool(REGISTER_GPU_KERNEL); +TF_CALL_variant(REGISTER_GPU_KERNEL); #undef REGISTER_GPU_KERNEL // A special GPU kernel for int32. @@ -322,22 +300,6 @@ REGISTER_KERNEL_BUILDER(Name("Size") .HostMemory("output"), SizeOp<int64>); -// TODO(ebrevdo): Once rendezvous has been properly set up for -// Variants, we'll no longer need a HostMemory attribute for this case. -REGISTER_KERNEL_BUILDER(Name("Size") - .Device(DEVICE_GPU) - .TypeConstraint<Variant>("T") - .TypeConstraint<int32>("out_type") - .HostMemory("input") - .HostMemory("output"), - SizeOp<int32>); -REGISTER_KERNEL_BUILDER(Name("Size") - .Device(DEVICE_GPU) - .TypeConstraint<Variant>("T") - .TypeConstraint<int64>("out_type") - .HostMemory("input") - .HostMemory("output"), - SizeOp<int64>); #endif // GOOGLE_CUDA #ifdef TENSORFLOW_USE_SYCL diff --git a/tensorflow/python/ops/state_ops.py b/tensorflow/python/ops/state_ops.py index f54bbfe90e..65ec2d4b77 100644 --- a/tensorflow/python/ops/state_ops.py +++ b/tensorflow/python/ops/state_ops.py @@ -124,7 +124,7 @@ def variable_op_v2(shape, dtype, name="Variable", container="", shared_name=""): with this shared_name. Otherwise, the node name is used instead. Returns: - A variable tensor.1;5A + A variable tensor. """ return gen_state_ops._variable_v2(shape=shape, dtype=dtype, |