diff options
author | Eugene Brevdo <ebrevdo@google.com> | 2018-04-06 21:00:42 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-04-06 21:03:10 -0700 |
commit | 273495dc2c957402f832cae31a438e550db2b7f0 (patch) | |
tree | 98691c91e0af5a5a7464ca0f2645b434160710fb /tensorflow/core/kernels/resource_variable_ops.cc | |
parent | 7f97f1bf69765be51b9f79f5134eb44736d216eb (diff) |
Improvements to ResourceVariable + Variant code.
* Works in graph + eager modes
* Fixed shape inference
* Updated shape inference + refiner + constant eval code to support static shape tensor of `-1` meaning unknown shape.
* Gather and Scatter for Variants now properly supported.
* Variable copy-on-write for Variants now does a more shallow copy (as Variants are not expected to be updated "in-place" inside a variable; instead Variants will be updated via read-update-write inside a CriticalSection)
PiperOrigin-RevId: 191975898
Diffstat (limited to 'tensorflow/core/kernels/resource_variable_ops.cc')
-rw-r--r-- | tensorflow/core/kernels/resource_variable_ops.cc | 118 |
1 files changed, 53 insertions, 65 deletions
diff --git a/tensorflow/core/kernels/resource_variable_ops.cc b/tensorflow/core/kernels/resource_variable_ops.cc index f49a05c70a..72504200cc 100644 --- a/tensorflow/core/kernels/resource_variable_ops.cc +++ b/tensorflow/core/kernels/resource_variable_ops.cc @@ -280,64 +280,6 @@ class AssignVariableOp : public OpKernel { }; template <typename Device> -Status VariantCopyFn(OpKernelContext* context, const Tensor& from, Tensor* to); - -#define CPU_DENSE_COPY(T) \ - case DataTypeToEnum<T>::value: { \ - functor::DenseUpdate<CPUDevice, T, ASSIGN> copy_functor_; \ - copy_functor_(context->eigen_device<CPUDevice>(), tensor->flat<T>(), \ - from.flat<T>()); \ - break; \ - } - -#define INSTANTIATE_GET_VARIANT_COPY_FN(Device, TYPE_CALLER, TYPE_DENSE_COPY) \ - template <> \ - Status VariantCopyFn<Device>(OpKernelContext * context, const Tensor& from, \ - Tensor* to) { \ - PersistentTensor tmp; \ - Tensor* tensor; \ - AllocatorAttributes attr; \ - attr.set_gpu_compatible(true); \ - attr.set_nic_compatible(true); \ - TF_RETURN_IF_ERROR(context->allocate_persistent( \ - from.dtype(), from.shape(), &tmp, &tensor, attr)); \ - switch (from.dtype()) { \ - TYPE_CALLER(TYPE_DENSE_COPY); \ - default: \ - return errors::InvalidArgument( \ - "VariantCopyFn: Could not perform a deep copy of variant " \ - "element of type: ", \ - DataTypeString(from.dtype()), \ - " using device: ", context->device()->name()); \ - } \ - *to = *tensor; \ - return Status::OK(); \ - } - -INSTANTIATE_GET_VARIANT_COPY_FN(CPUDevice, TF_CALL_ALL_TYPES, CPU_DENSE_COPY); - -#if GOOGLE_CUDA -#define GPU_DENSE_COPY(T) \ - case DataTypeToEnum<T>::value: { \ - functor::DenseUpdate<GPUDevice, T, ASSIGN> copy_functor_; \ - copy_functor_(context->eigen_device<GPUDevice>(), tensor->flat<T>(), \ - from.flat<T>()); \ - break; \ - } -#define TF_CALL_GPU_AND_ADDITIONAL_TYPES(T) \ - TF_CALL_GPU_ALL_TYPES(T); \ - TF_CALL_int32(T); \ - TF_CALL_int64(T); -INSTANTIATE_GET_VARIANT_COPY_FN(GPUDevice, TF_CALL_GPU_AND_ADDITIONAL_TYPES, - GPU_DENSE_COPY); -#undef TF_CALL_GPU_AND_ADDITIONAL_TYPES -#undef GPU_DENSE_COPY -#endif // GOOGLE_CUDA - -#undef CPU_DENSE_COPY -#undef INSTANTIATE_GET_VARIANT_COPY_FN - -template <typename Device> class AssignVariableOp<Device, Variant> : public OpKernel { public: explicit AssignVariableOp(OpKernelConstruction* c) : OpKernel(c) { @@ -370,9 +312,16 @@ class AssignVariableOp<Device, Variant> : public OpKernel { // Copying is unnecessary if we are the last user of the value // tensor, we can just adopt the input tensor's buffer instead. // Note that Variant objects themselves always reside on host. + // + // We nevertheless want to signal to the runtime that the tensor + // should reside in memory of the associated device, as Variant + // tensors may be marked as sitting on either CPU or GPU. This + // helps to elide one or more copies. std::unique_ptr<Tensor> input_alias = context->forward_input( 1, OpKernelContext::Params::kNoReservation /*output_index*/, DT_VARIANT, - value.shape(), HOST_MEMORY, attr); + value.shape(), + std::is_same<Device, CPUDevice>::value ? HOST_MEMORY : DEVICE_MEMORY, + attr); mutex_lock ml(*variable->mu()); variable->is_initialized = true; @@ -396,12 +345,8 @@ class AssignVariableOp<Device, Variant> : public OpKernel { const auto elements_in = value.flat<Variant>(); auto elements_out = variable->tensor()->flat<Variant>(); - auto copy_fn = std::bind(&VariantCopyFn<Device>, context, - std::placeholders::_1, std::placeholders::_2); for (int64 i = 0; i < elements_in.size(); ++i) { - OP_REQUIRES_OK(context, VariantDeviceCopy( - VariantDeviceCopyDirection::DEVICE_TO_DEVICE, - elements_in(i), &elements_out(i), copy_fn)); + elements_out(i) = elements_in(i); } } @@ -560,7 +505,14 @@ class ResourceGatherOp : public OpKernel { } Tensor* out = nullptr; - OP_REQUIRES_OK(c, c->allocate_output(0, result_shape, &out)); + Tensor tmp; + if (params.dtype() == DT_VARIANT) { + tmp = Tensor(DT_VARIANT, result_shape); + c->set_output(0, tmp); + out = &tmp; + } else { + OP_REQUIRES_OK(c, c->allocate_output(0, result_shape, &out)); + } if (N > 0) { const int64 gather_dim_size = params.dim_size(0); int64 inner_size = 1; @@ -607,6 +559,23 @@ TF_CALL_QUANTIZED_TYPES(REGISTER_GATHER_CPU); TF_CALL_GPU_NUMBER_TYPES(REGISTER_GATHER_GPU); +// Variant objects themselves sit on CPU, even if they contain data +// pointing to a device. +REGISTER_KERNEL_BUILDER(Name("ResourceGather") + .Device(DEVICE_GPU) + .HostMemory("resource") + .HostMemory("indices") + .TypeConstraint<Variant>("dtype") + .TypeConstraint<int32>("Tindices"), + ResourceGatherOp<GPUDevice, Variant, int32>) +REGISTER_KERNEL_BUILDER(Name("ResourceGather") + .Device(DEVICE_GPU) + .HostMemory("resource") + .HostMemory("indices") + .TypeConstraint<Variant>("dtype") + .TypeConstraint<int64>("Tindices"), + ResourceGatherOp<GPUDevice, Variant, int64>) + #endif // GOOGLE_CUDA #undef REGISTER_GATHER_CPU @@ -721,6 +690,8 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER_SCATTER_MINMAX_CPU); REGISTER_SCATTER_KERNEL(string, CPU, "ResourceScatterUpdate", scatter_op::UpdateOp::ASSIGN); +REGISTER_SCATTER_KERNEL(Variant, CPU, "ResourceScatterUpdate", + scatter_op::UpdateOp::ASSIGN); // Registers GPU kernels. #if GOOGLE_CUDA @@ -733,6 +704,23 @@ REGISTER_SCATTER_KERNEL(string, CPU, "ResourceScatterUpdate", TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ARITHMETIC_GPU); TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_MINMAX_GPU); +REGISTER_KERNEL_BUILDER(Name("ResourceScatterUpdate") + .Device(DEVICE_GPU) + .HostMemory("resource") + .HostMemory("indices") + .TypeConstraint<Variant>("dtype") + .TypeConstraint<int32>("Tindices"), + ResourceScatterUpdateOp<GPUDevice, Variant, int32, + scatter_op::UpdateOp::ASSIGN>) +REGISTER_KERNEL_BUILDER(Name("ResourceScatterUpdate") + .Device(DEVICE_GPU) + .HostMemory("resource") + .HostMemory("indices") + .TypeConstraint<Variant>("dtype") + .TypeConstraint<int64>("Tindices"), + ResourceScatterUpdateOp<GPUDevice, Variant, int64, + scatter_op::UpdateOp::ASSIGN>) + #endif // GOOGLE_CUDA #undef REGISTER_SCATTER_ARITHMETIC |