Temporarily disable buffer forwarding for GPU memory since we don't see a large gain in performance and potentially introduce bugs in Cuda kernels that rely on reads from read-only (texture cache) memory.

Change: 148520565
author: A. Unique TensorFlower <gardener@tensorflow.org> 2017-02-24 16:51:27 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-02-24 17:09:10 -0800
commit: 736d1551d47c61c93b25943334d132aa0f06b2c2 (patch)
tree: 6f243bd85123f41b8367c705c60ddea37f87d82d
parent: b5295c9d406d56de5e8bf8d038bad34bb8ff40c5 (diff)
1 files changed, 12 insertions, 0 deletions
diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc
index 106fef16e3..2b0488d944 100644
--- a/tensorflow/core/framework/op_kernel.cc
+++ b/tensorflow/core/framework/op_kernel.cc
@@ -413,6 +413,18 @@ bool OpKernelContext::forward_input_to_output_with_shape(
       op_kernel().input_memory_types()[input_index]) {
     return false;
   }
+
+  // TODO(rmlarsen,zhengxq): Re-enable for GPU memory once kernels have been
+  // made forwarding aware or decorated to expose which inputs they rely on
+  // to access via the read-only texture cache.
+  // TODO(rmlarsen): Short term, move disabling logic into the kernels
+  // themselves for fine-grained control.
+  DCHECK(params_->device != nullptr);
+  if (op_kernel().output_memory_types()[output_index] == DEVICE_MEMORY &&
+      params_->device->attributes().device_type() == DEVICE_GPU) {
+    return false;
+  }
+
   // Check that output allocator attributes are not more restrictive than
   // input allocator attributes.
   const auto input_attr = params_->input_alloc_attrs == nullptr
author	A. Unique TensorFlower <gardener@tensorflow.org>	2017-02-24 16:51:27 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-02-24 17:09:10 -0800
commit	736d1551d47c61c93b25943334d132aa0f06b2c2 (patch)
tree	6f243bd85123f41b8367c705c60ddea37f87d82d
parent	b5295c9d406d56de5e8bf8d038bad34bb8ff40c5 (diff)