Fix dependencies bugs

Change: 116925769
author: Eugene Brevdo <ebrevdo@gmail.com> 2016-03-10 17:18:30 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2016-03-11 11:41:23 -0800
commit: 56f1d64998744ad655fe5c428658a13be35b865e (patch)
tree: 1c4e5ec1192835898b9e17f462cf62838534add2 /tensorflow/core/kernels/resize_nearest_neighbor_op.cc
parent: 64dd5b58d52d37697d5beb68e2177b966108e0a7 (diff)
1 files changed, 91 insertions, 8 deletions
diff --git a/tensorflow/core/kernels/resize_nearest_neighbor_op.cc b/tensorflow/core/kernels/resize_nearest_neighbor_op.cc
index c3ed9914c9..059ef83bb0 100644
--- a/tensorflow/core/kernels/resize_nearest_neighbor_op.cc
+++ b/tensorflow/core/kernels/resize_nearest_neighbor_op.cc
@@ -26,6 +26,10 @@ limitations under the License.
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/platform/logging.h"
 
+#if GOOGLE_CUDA
+#include "tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.h"
+#endif  // GOOGLE_CUDA
+
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
@@ -58,10 +62,10 @@ class ResizeNearestNeighborOp : public OpKernel {
     // Initialize shape to the batch size of the input, then add
     // the rest of the dimensions
     Tensor* output = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(
-                                0, TensorShape({input.dim_size(0), sizes(0),
-                                                sizes(1), input.dim_size(3)}),
-                                &output));
+    OP_REQUIRES_OK(
+        context, context->allocate_output(0, TensorShape({input.dim_size(0), sizes(0),
+                                                          sizes(1), input.dim_size(3)}),
+                                          &output));
 
     const int64 batch_size = input.dim_size(0);
     const int64 in_height = input.dim_size(1);
@@ -132,10 +136,10 @@ class ResizeNearestNeighborOpGrad : public OpKernel {
     // Initialize shape to the batch size of the input, then add
     // the rest of the dimensions
     Tensor* output = nullptr;
-    OP_REQUIRES_OK(context, context->allocate_output(
-                                0, TensorShape({input.dim_size(0), sizes(0),
-                                                sizes(1), input.dim_size(3)}),
-                                &output));
+    OP_REQUIRES_OK(
+        context, context->allocate_output(0, TensorShape({input.dim_size(0), sizes(0),
+                                                          sizes(1), input.dim_size(3)}),
+                                          &output));
 
     const int64 batch_size = input.dim_size(0);
     const int64 in_height = input.dim_size(1);
@@ -204,4 +208,83 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNEL);
 
 #undef REGISTER_KERNEL
 
+#if GOOGLE_CUDA
+
+template <typename T>
+class ResizeNearestNeighborGPUOp : public OpKernel {
+ public:
+  explicit ResizeNearestNeighborGPUOp(OpKernelConstruction* context)
+      : OpKernel(context) {
+    OP_REQUIRES_OK(context, context->GetAttr("align_corners", &align_corners_));
+  }
+
+  void Compute(OpKernelContext* context) override {
+    const Tensor& input = context->input(0);
+    OP_REQUIRES(context, input.dims() == 4,
+                errors::InvalidArgument("input must be 4-dimensional",
+                                        input.shape().DebugString()));
+    const Tensor& shape_t = context->input(1);
+    OP_REQUIRES(context, shape_t.dims() == 1,
+                errors::InvalidArgument("shape_t must be 1-dimensional",
+                                        shape_t.shape().DebugString()));
+    OP_REQUIRES(context, shape_t.NumElements() == 2,
+                errors::InvalidArgument("shape_t must have two elements",
+                                        shape_t.shape().DebugString()));
+
+    auto sizes = shape_t.vec<int32>();
+    OP_REQUIRES(context, sizes(0) > 0 && sizes(1) > 0,
+                errors::InvalidArgument("shape_t's elements must be positive"));
+
+    // Initialize shape to the batch size of the input, then add
+    // the rest of the dimensions
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(
+        context, context->allocate_output(0, TensorShape({input.dim_size(0), sizes(0),
+                                                          sizes(1), input.dim_size(3)}),
+                                          &output));
+
+    const int64 batch_size = input.dim_size(0);
+    const int64 in_height = input.dim_size(1);
+    const int64 in_width = input.dim_size(2);
+    const int64 channels = input.dim_size(3);
+    const int64 out_height = output->dim_size(1);
+    const int64 out_width = output->dim_size(2);
+
+    const float height_scale =
+        (align_corners_ && out_height > 1)
+            ? (in_height - 1) / static_cast<float>(out_height - 1)
+            : in_height / static_cast<float>(out_height);
+    const float width_scale =
+        (align_corners_ && out_width > 1)
+            ? (in_width - 1) / static_cast<float>(out_width - 1)
+            : in_width / static_cast<float>(out_width);
+
+    bool status = ResizeNearestNeighbor<T>(
+        input.flat<T>().data(), batch_size, in_height,
+        in_width, channels, out_height, out_width,
+        height_scale, width_scale, output->flat<T>().data(),
+        context->eigen_gpu_device());
+
+    if (!status) {
+      context->SetStatus(
+          errors::Internal("Failed launching ResizeNearestNeighbor"));
+    }
+  }
+ private:
+  bool align_corners_;
+};
+
+#define REGISTER_KERNEL(T)                                        \
+  REGISTER_KERNEL_BUILDER(Name("ResizeNearestNeighbor")           \
+                              .Device(DEVICE_GPU)                 \
+                              .TypeConstraint<T>("T")             \
+                              .HostMemory("size"),                \
+                          ResizeNearestNeighborGPUOp<T>);
+
+TF_CALL_GPU_NUMBER_TYPES(REGISTER_KERNEL);
+
+#undef REGISTER_KERNEL
+
+#endif  // GOOGLE_CUDA
+
 }  // namespace tensorflow
author	Eugene Brevdo <ebrevdo@gmail.com>	2016-03-10 17:18:30 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2016-03-11 11:41:23 -0800
commit	56f1d64998744ad655fe5c428658a13be35b865e (patch)
tree	1c4e5ec1192835898b9e17f462cf62838534add2 /tensorflow/core/kernels/resize_nearest_neighbor_op.cc
parent	64dd5b58d52d37697d5beb68e2177b966108e0a7 (diff)