From 736d1551d47c61c93b25943334d132aa0f06b2c2 Mon Sep 17 00:00:00 2001
From: "A. Unique TensorFlower" <gardener@tensorflow.org>
Date: Fri, 24 Feb 2017 16:51:27 -0800
Subject: Temporarily disable buffer forwarding for GPU memory since we don't
 see a large gain in performance and potentially introduce bugs in Cuda
 kernels that rely on reads from read-only (texture cache) memory. Change:
 148520565

---
 tensorflow/core/framework/op_kernel.cc | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc
index 106fef16e3..2b0488d944 100644
--- a/tensorflow/core/framework/op_kernel.cc
+++ b/tensorflow/core/framework/op_kernel.cc
@@ -413,6 +413,18 @@ bool OpKernelContext::forward_input_to_output_with_shape(
       op_kernel().input_memory_types()[input_index]) {
     return false;
   }
+
+  // TODO(rmlarsen,zhengxq): Re-enable for GPU memory once kernels have been
+  // made forwarding aware or decorated to expose which inputs they rely on
+  // to access via the read-only texture cache.
+  // TODO(rmlarsen): Short term, move disabling logic into the kernels
+  // themselves for fine-grained control.
+  DCHECK(params_->device != nullptr);
+  if (op_kernel().output_memory_types()[output_index] == DEVICE_MEMORY &&
+      params_->device->attributes().device_type() == DEVICE_GPU) {
+    return false;
+  }
+
   // Check that output allocator attributes are not more restrictive than
   // input allocator attributes.
   const auto input_attr = params_->input_alloc_attrs == nullptr
-- 
cgit v1.2.3