aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/resource_variable_ops.cc
diff options
context:
space:
mode:
authorGravatar Eugene Brevdo <ebrevdo@google.com>2018-04-06 21:00:42 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-04-06 21:03:10 -0700
commit273495dc2c957402f832cae31a438e550db2b7f0 (patch)
tree98691c91e0af5a5a7464ca0f2645b434160710fb /tensorflow/core/kernels/resource_variable_ops.cc
parent7f97f1bf69765be51b9f79f5134eb44736d216eb (diff)
Improvements to ResourceVariable + Variant code.
* Works in graph + eager modes * Fixed shape inference * Updated shape inference + refiner + constant eval code to support static shape tensor of `-1` meaning unknown shape. * Gather and Scatter for Variants now properly supported. * Variable copy-on-write for Variants now does a more shallow copy (as Variants are not expected to be updated "in-place" inside a variable; instead Variants will be updated via read-update-write inside a CriticalSection) PiperOrigin-RevId: 191975898
Diffstat (limited to 'tensorflow/core/kernels/resource_variable_ops.cc')
-rw-r--r--tensorflow/core/kernels/resource_variable_ops.cc118
1 files changed, 53 insertions, 65 deletions
diff --git a/tensorflow/core/kernels/resource_variable_ops.cc b/tensorflow/core/kernels/resource_variable_ops.cc
index f49a05c70a..72504200cc 100644
--- a/tensorflow/core/kernels/resource_variable_ops.cc
+++ b/tensorflow/core/kernels/resource_variable_ops.cc
@@ -280,64 +280,6 @@ class AssignVariableOp : public OpKernel {
};
template <typename Device>
-Status VariantCopyFn(OpKernelContext* context, const Tensor& from, Tensor* to);
-
-#define CPU_DENSE_COPY(T) \
- case DataTypeToEnum<T>::value: { \
- functor::DenseUpdate<CPUDevice, T, ASSIGN> copy_functor_; \
- copy_functor_(context->eigen_device<CPUDevice>(), tensor->flat<T>(), \
- from.flat<T>()); \
- break; \
- }
-
-#define INSTANTIATE_GET_VARIANT_COPY_FN(Device, TYPE_CALLER, TYPE_DENSE_COPY) \
- template <> \
- Status VariantCopyFn<Device>(OpKernelContext * context, const Tensor& from, \
- Tensor* to) { \
- PersistentTensor tmp; \
- Tensor* tensor; \
- AllocatorAttributes attr; \
- attr.set_gpu_compatible(true); \
- attr.set_nic_compatible(true); \
- TF_RETURN_IF_ERROR(context->allocate_persistent( \
- from.dtype(), from.shape(), &tmp, &tensor, attr)); \
- switch (from.dtype()) { \
- TYPE_CALLER(TYPE_DENSE_COPY); \
- default: \
- return errors::InvalidArgument( \
- "VariantCopyFn: Could not perform a deep copy of variant " \
- "element of type: ", \
- DataTypeString(from.dtype()), \
- " using device: ", context->device()->name()); \
- } \
- *to = *tensor; \
- return Status::OK(); \
- }
-
-INSTANTIATE_GET_VARIANT_COPY_FN(CPUDevice, TF_CALL_ALL_TYPES, CPU_DENSE_COPY);
-
-#if GOOGLE_CUDA
-#define GPU_DENSE_COPY(T) \
- case DataTypeToEnum<T>::value: { \
- functor::DenseUpdate<GPUDevice, T, ASSIGN> copy_functor_; \
- copy_functor_(context->eigen_device<GPUDevice>(), tensor->flat<T>(), \
- from.flat<T>()); \
- break; \
- }
-#define TF_CALL_GPU_AND_ADDITIONAL_TYPES(T) \
- TF_CALL_GPU_ALL_TYPES(T); \
- TF_CALL_int32(T); \
- TF_CALL_int64(T);
-INSTANTIATE_GET_VARIANT_COPY_FN(GPUDevice, TF_CALL_GPU_AND_ADDITIONAL_TYPES,
- GPU_DENSE_COPY);
-#undef TF_CALL_GPU_AND_ADDITIONAL_TYPES
-#undef GPU_DENSE_COPY
-#endif // GOOGLE_CUDA
-
-#undef CPU_DENSE_COPY
-#undef INSTANTIATE_GET_VARIANT_COPY_FN
-
-template <typename Device>
class AssignVariableOp<Device, Variant> : public OpKernel {
public:
explicit AssignVariableOp(OpKernelConstruction* c) : OpKernel(c) {
@@ -370,9 +312,16 @@ class AssignVariableOp<Device, Variant> : public OpKernel {
// Copying is unnecessary if we are the last user of the value
// tensor, we can just adopt the input tensor's buffer instead.
// Note that Variant objects themselves always reside on host.
+ //
+ // We nevertheless want to signal to the runtime that the tensor
+ // should reside in memory of the associated device, as Variant
+ // tensors may be marked as sitting on either CPU or GPU. This
+ // helps to elide one or more copies.
std::unique_ptr<Tensor> input_alias = context->forward_input(
1, OpKernelContext::Params::kNoReservation /*output_index*/, DT_VARIANT,
- value.shape(), HOST_MEMORY, attr);
+ value.shape(),
+ std::is_same<Device, CPUDevice>::value ? HOST_MEMORY : DEVICE_MEMORY,
+ attr);
mutex_lock ml(*variable->mu());
variable->is_initialized = true;
@@ -396,12 +345,8 @@ class AssignVariableOp<Device, Variant> : public OpKernel {
const auto elements_in = value.flat<Variant>();
auto elements_out = variable->tensor()->flat<Variant>();
- auto copy_fn = std::bind(&VariantCopyFn<Device>, context,
- std::placeholders::_1, std::placeholders::_2);
for (int64 i = 0; i < elements_in.size(); ++i) {
- OP_REQUIRES_OK(context, VariantDeviceCopy(
- VariantDeviceCopyDirection::DEVICE_TO_DEVICE,
- elements_in(i), &elements_out(i), copy_fn));
+ elements_out(i) = elements_in(i);
}
}
@@ -560,7 +505,14 @@ class ResourceGatherOp : public OpKernel {
}
Tensor* out = nullptr;
- OP_REQUIRES_OK(c, c->allocate_output(0, result_shape, &out));
+ Tensor tmp;
+ if (params.dtype() == DT_VARIANT) {
+ tmp = Tensor(DT_VARIANT, result_shape);
+ c->set_output(0, tmp);
+ out = &tmp;
+ } else {
+ OP_REQUIRES_OK(c, c->allocate_output(0, result_shape, &out));
+ }
if (N > 0) {
const int64 gather_dim_size = params.dim_size(0);
int64 inner_size = 1;
@@ -607,6 +559,23 @@ TF_CALL_QUANTIZED_TYPES(REGISTER_GATHER_CPU);
TF_CALL_GPU_NUMBER_TYPES(REGISTER_GATHER_GPU);
+// Variant objects themselves sit on CPU, even if they contain data
+// pointing to a device.
+REGISTER_KERNEL_BUILDER(Name("ResourceGather")
+ .Device(DEVICE_GPU)
+ .HostMemory("resource")
+ .HostMemory("indices")
+ .TypeConstraint<Variant>("dtype")
+ .TypeConstraint<int32>("Tindices"),
+ ResourceGatherOp<GPUDevice, Variant, int32>)
+REGISTER_KERNEL_BUILDER(Name("ResourceGather")
+ .Device(DEVICE_GPU)
+ .HostMemory("resource")
+ .HostMemory("indices")
+ .TypeConstraint<Variant>("dtype")
+ .TypeConstraint<int64>("Tindices"),
+ ResourceGatherOp<GPUDevice, Variant, int64>)
+
#endif // GOOGLE_CUDA
#undef REGISTER_GATHER_CPU
@@ -721,6 +690,8 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER_SCATTER_MINMAX_CPU);
REGISTER_SCATTER_KERNEL(string, CPU, "ResourceScatterUpdate",
scatter_op::UpdateOp::ASSIGN);
+REGISTER_SCATTER_KERNEL(Variant, CPU, "ResourceScatterUpdate",
+ scatter_op::UpdateOp::ASSIGN);
// Registers GPU kernels.
#if GOOGLE_CUDA
@@ -733,6 +704,23 @@ REGISTER_SCATTER_KERNEL(string, CPU, "ResourceScatterUpdate",
TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ARITHMETIC_GPU);
TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_MINMAX_GPU);
+REGISTER_KERNEL_BUILDER(Name("ResourceScatterUpdate")
+ .Device(DEVICE_GPU)
+ .HostMemory("resource")
+ .HostMemory("indices")
+ .TypeConstraint<Variant>("dtype")
+ .TypeConstraint<int32>("Tindices"),
+ ResourceScatterUpdateOp<GPUDevice, Variant, int32,
+ scatter_op::UpdateOp::ASSIGN>)
+REGISTER_KERNEL_BUILDER(Name("ResourceScatterUpdate")
+ .Device(DEVICE_GPU)
+ .HostMemory("resource")
+ .HostMemory("indices")
+ .TypeConstraint<Variant>("dtype")
+ .TypeConstraint<int64>("Tindices"),
+ ResourceScatterUpdateOp<GPUDevice, Variant, int64,
+ scatter_op::UpdateOp::ASSIGN>)
+
#endif // GOOGLE_CUDA
#undef REGISTER_SCATTER_ARITHMETIC