aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/python/lib
diff options
context:
space:
mode:
authorGravatar Asim Shankar <ashankar@google.com>2018-06-15 16:26:35 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-06-15 16:31:01 -0700
commit4d8a66c5b29428b709f4f54b566a44902ea8173e (patch)
tree717b000e9284c2c9ae0710c32df14bc36f86dda7 /tensorflow/python/lib
parent96100f90a90bb2db905f50617cbb5e7928480667 (diff)
[py_func]: Fix #20021
* EagerPyFunc now validates its assumption that returned tensors are backed by memory on the same device that the EagerPyFunc kernel executed on. * Make the Python trampolining mechanism ensure that this requirement of the kernel is met. * Allow tf.contrib.eager.py_func to execute correctly on devices other than CPU and GPU:0. Prior to this change, tf.contrib.eager.py_func() would copy data from CPU to GPU:0 if necessary, but not the other way around. As a result, the assumptions made by the EagerPyFunc kernel implementation about the placement of returned tensors would be violated. The test added in py_func_test.py, when executed on a machine with a GPU will: - Fail with a segmentation fault (dereferencing GPU memory) without the changes to py_func.cc and script_ops.py - Fail with an error message with the change to py_func.cc but without the change to script_ops.py - Pass with changes to py_func.cc and script_ops.py PiperOrigin-RevId: 200792057
Diffstat (limited to 'tensorflow/python/lib')
-rw-r--r--tensorflow/python/lib/core/py_func.cc70
1 files changed, 48 insertions, 22 deletions
diff --git a/tensorflow/python/lib/core/py_func.cc b/tensorflow/python/lib/core/py_func.cc
index 30c1a9c759..57139986af 100644
--- a/tensorflow/python/lib/core/py_func.cc
+++ b/tensorflow/python/lib/core/py_func.cc
@@ -55,37 +55,35 @@ struct PyCall {
string token;
// The device on which Tensors are stored; only used for EagerPyFunc.
- Device* device;
-
- // True if and only if the op has been placed on a GPU.
- bool gpu;
+ Device* device = nullptr;
// True if the call is associated with an EagerPyFunc.
- bool eager;
+ bool eager = false;
// Inputs and outputs of this function invocation.
std::vector<Tensor> ins;
std::vector<Tensor> out;
};
+bool IsCPUDevice(const Device* d) {
+ return d == nullptr || d->tensorflow_gpu_device_info() == nullptr;
+}
+
// Givens the 'call', prepares the token and inputs as a python tuple
// that is appropriate for calling the trampoline.
Status MakeArgTuple(const PyCall* call, PyObject** tuple) {
int64 n = call->ins.size();
PyObject* lst = PyList_New(n);
CHECK(lst);
+ // TFE_TensorHandle assumes that CPU is identified by nullptr.
+ Device* device = IsCPUDevice(call->device) ? nullptr : call->device;
for (int64 i = 0; i < n; ++i) {
PyObject* arg = nullptr;
const Tensor& t = call->ins[i];
if (call->eager) {
- if (call->gpu) {
- arg = EagerTensorFromHandle(
- new TFE_TensorHandle(t, call->device, call->device));
- } else {
- // TFE_TensorHandle assumes that CPU is identified by `nullptr`.
- arg = EagerTensorFromHandle(new TFE_TensorHandle(t, nullptr, nullptr));
- }
+ arg = EagerTensorFromHandle(new TFE_TensorHandle(t, device, device));
if (arg == nullptr) {
+ Py_DECREF(lst);
return errors::Internal("Unable to procure EagerTensor from Tensor.");
}
} else {
@@ -97,8 +95,9 @@ Status MakeArgTuple(const PyCall* call, PyObject** tuple) {
}
PyList_SetItem(lst, i, arg);
}
- *tuple = Py_BuildValue("(sON)", call->token.c_str(),
- call->gpu ? Py_True : Py_False, lst);
+ const char* device_name =
+ device == nullptr ? nullptr : device->attributes().name().c_str();
+ *tuple = Py_BuildValue("(ssN)", call->token.c_str(), device_name, lst);
CHECK(*tuple);
return Status::OK();
}
@@ -167,9 +166,40 @@ bool IsSingleNone(PyObject* obj) {
}
// Retrieves a Tensor from `eager_tensor` and stores it in `output_tensor`.
+// Validates that `output_tensor` is backed by memory in `expected_device`
+// (which is assumed to be a local device, one on which the kernel was
+// executed.)
+//
+// It may be nice to copy the tensor to the right device instead of failing if
+// it isn't already there. This is left as a future exercise. The required
+// device-copying logic is implemented in Python at the moment.
tensorflow::Status ExtractTensorFromEagerTensor(const PyObject* eager_tensor,
+ const Device* expected_device,
const Tensor** output_tensor) {
- return EagerTensor_Handle(eager_tensor)->handle->Tensor(output_tensor);
+ auto handle = EagerTensor_Handle(eager_tensor)->handle;
+ Device* actual_device = nullptr;
+ TF_RETURN_IF_ERROR(handle->Device(&actual_device));
+ TF_RETURN_IF_ERROR(handle->Tensor(output_tensor));
+ // actual_device may be nullptr, which implies local CPU.
+ if (expected_device == actual_device) return Status::OK();
+ const string& expected_device_name = expected_device->attributes().name();
+ if (actual_device == nullptr) {
+ if (!IsCPUDevice(expected_device)) {
+ return errors::Internal(
+ "expected the py_func to return a Tensor backed by memory in ",
+ expected_device_name,
+ ", but is actually backed by local host memory. This is a bug.");
+ }
+ return Status::OK();
+ }
+ const string& actual_device_name = actual_device->attributes().name();
+ if (actual_device_name != expected_device_name) {
+ return errors::Internal(
+ "expected the py_func to return a Tensor backed by memory in ",
+ expected_device_name, ", but is actually in ", actual_device_name,
+ ". This is a bug.");
+ }
+ return Status::OK();
}
// Calls the registered py function through the trampoline.
@@ -224,7 +254,7 @@ Status DoCallPyFunc(PyCall* call, bool* out_log_on_error) {
const PyObject* item = PyList_GetItem(result, i);
if (EagerTensor_CheckExact(item)) {
const Tensor* tensor = nullptr;
- s = ExtractTensorFromEagerTensor(item, &tensor);
+ s = ExtractTensorFromEagerTensor(item, call->device, &tensor);
if (s.ok()) t = *tensor;
} else {
s = errors::FailedPrecondition(
@@ -245,7 +275,7 @@ Status DoCallPyFunc(PyCall* call, bool* out_log_on_error) {
DCHECK(call->eager);
if (result != Py_None) {
const Tensor* t = nullptr;
- s = ExtractTensorFromEagerTensor(result, &t);
+ s = ExtractTensorFromEagerTensor(result, call->device, &t);
if (s.ok()) call->out.push_back(*t);
}
} else if (PyArray_Check(result)) {
@@ -449,13 +479,11 @@ class PyFuncOp : public OpKernel {
explicit PyFuncOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
OP_REQUIRES_OK(ctx, ctx->GetAttr("token", &token_));
eager_ = type_string() == "EagerPyFunc";
- gpu_ = ctx->device_type().type_string() == DEVICE_GPU;
}
void Compute(OpKernelContext* ctx) override {
PyCall call;
call.token = token_;
- call.gpu = gpu_;
call.eager = eager_;
if (call.eager) {
// Eager's C API uses `Device`, whereas `OpKernelContext` stores a
@@ -464,6 +492,7 @@ class PyFuncOp : public OpKernel {
if (call.device == nullptr) {
ctx->CtxFailureWithWarning(
errors::Internal("Unrecognized device class"));
+ return;
}
}
@@ -508,9 +537,6 @@ class PyFuncOp : public OpKernel {
private:
string token_;
- // True if and only if this op has been placed on a GPU.
- bool gpu_;
-
// True if and only if this op should execute the python function eagerly,
// i.e., if and only if the eager attribute is set.
bool eager_;