aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/conv_ops.h
diff options
context:
space:
mode:
authorGravatar Pete Warden <petewarden@google.com>2016-09-02 12:00:41 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-09-02 13:03:02 -0700
commitcb324446acbdf0d3d2129904361cf0bcbe53e852 (patch)
treeaedd54ffe0e19d5ecf4d7f2f9d96686374dab80c /tensorflow/core/kernels/conv_ops.h
parent10451eb6cfe67a8277c39a2fd7848fbbef706f10 (diff)
Fuse resize and mirror padding ops into convolutions
Spatial transformations like padding and bilinear resizing can be merged into the im2col stage of conv2d. This reduces the memory usage considerably (from 338MB to 224MB) and latency (by 15%) on some models, and helps us avoid OOM crashes on iOS. This PR has all the changes needed to fuse these particular ops, including the kernels themselves and integration into the optimize_for_inference script. Change: 132094335
Diffstat (limited to 'tensorflow/core/kernels/conv_ops.h')
-rw-r--r--tensorflow/core/kernels/conv_ops.h11
1 files changed, 11 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/conv_ops.h b/tensorflow/core/kernels/conv_ops.h
index d09db3dc15..858be520b0 100644
--- a/tensorflow/core/kernels/conv_ops.h
+++ b/tensorflow/core/kernels/conv_ops.h
@@ -17,6 +17,7 @@ limitations under the License.
#define TENSORFLOW_KERNELS_CONV_OPS_H_
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/resource_mgr.h"
#include "tensorflow/core/util/tensor_format.h"
#if GOOGLE_CUDA
@@ -38,6 +39,16 @@ class LaunchConv2DOp {
TensorFormat data_format);
};
+// Used to keep track of persistent memory buffers used within the op.
+template <class T, size_t size>
+struct Im2ColBufferResource : public ResourceBase {
+ // This mutex ensures that only a single operation at a time is able to use
+ // the buffer memory held by this resource.
+ mutex mu;
+ T data[size];
+ string DebugString() { return "Im2ColBufferResource"; }
+};
+
#ifdef GOOGLE_CUDA
template <typename T>
class LaunchConv2DOp<Eigen::GpuDevice, T> {