Fuse resize and mirror padding ops into convolutions

Spatial transformations like padding and bilinear resizing can be merged into the im2col stage of conv2d. This reduces the memory usage considerably (from 338MB to 224MB) and latency (by 15%) on some models, and helps us avoid OOM crashes on iOS. This PR has all the changes needed to fuse these particular ops, including the kernels themselves and integration into the optimize_for_inference script. Change: 132094335
author: Pete Warden <petewarden@google.com> 2016-09-02 12:00:41 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2016-09-02 13:03:02 -0700
commit: cb324446acbdf0d3d2129904361cf0bcbe53e852 (patch)
tree: aedd54ffe0e19d5ecf4d7f2f9d96686374dab80c /tensorflow/core/kernels/conv_ops.h
parent: 10451eb6cfe67a8277c39a2fd7848fbbef706f10 (diff)
1 files changed, 11 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/conv_ops.h b/tensorflow/core/kernels/conv_ops.h
index d09db3dc15..858be520b0 100644
--- a/tensorflow/core/kernels/conv_ops.h
+++ b/tensorflow/core/kernels/conv_ops.h
@@ -17,6 +17,7 @@ limitations under the License.
 #define TENSORFLOW_KERNELS_CONV_OPS_H_
 
 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/resource_mgr.h"
 #include "tensorflow/core/util/tensor_format.h"
 
 #if GOOGLE_CUDA
@@ -38,6 +39,16 @@ class LaunchConv2DOp {
               TensorFormat data_format);
 };
 
+// Used to keep track of persistent memory buffers used within the op.
+template <class T, size_t size>
+struct Im2ColBufferResource : public ResourceBase {
+  // This mutex ensures that only a single operation at a time is able to use
+  // the buffer memory held by this resource.
+  mutex mu;
+  T data[size];
+  string DebugString() { return "Im2ColBufferResource"; }
+};
+
 #ifdef GOOGLE_CUDA
 template <typename T>
 class LaunchConv2DOp<Eigen::GpuDevice, T> {
author	Pete Warden <petewarden@google.com>	2016-09-02 12:00:41 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2016-09-02 13:03:02 -0700
commit	cb324446acbdf0d3d2129904361cf0bcbe53e852 (patch)
tree	aedd54ffe0e19d5ecf4d7f2f9d96686374dab80c /tensorflow/core/kernels/conv_ops.h
parent	10451eb6cfe67a8277c39a2fd7848fbbef706f10 (diff)