aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/resize_nearest_neighbor_op.cc
diff options
context:
space:
mode:
authorGravatar Eugene Brevdo <ebrevdo@gmail.com>2016-03-10 17:18:30 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-03-11 11:41:23 -0800
commit56f1d64998744ad655fe5c428658a13be35b865e (patch)
tree1c4e5ec1192835898b9e17f462cf62838534add2 /tensorflow/core/kernels/resize_nearest_neighbor_op.cc
parent64dd5b58d52d37697d5beb68e2177b966108e0a7 (diff)
Fix dependencies bugs
Change: 116925769
Diffstat (limited to 'tensorflow/core/kernels/resize_nearest_neighbor_op.cc')
-rw-r--r--tensorflow/core/kernels/resize_nearest_neighbor_op.cc99
1 files changed, 91 insertions, 8 deletions
diff --git a/tensorflow/core/kernels/resize_nearest_neighbor_op.cc b/tensorflow/core/kernels/resize_nearest_neighbor_op.cc
index c3ed9914c9..059ef83bb0 100644
--- a/tensorflow/core/kernels/resize_nearest_neighbor_op.cc
+++ b/tensorflow/core/kernels/resize_nearest_neighbor_op.cc
@@ -26,6 +26,10 @@ limitations under the License.
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/platform/logging.h"
+#if GOOGLE_CUDA
+#include "tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.h"
+#endif // GOOGLE_CUDA
+
namespace tensorflow {
typedef Eigen::ThreadPoolDevice CPUDevice;
@@ -58,10 +62,10 @@ class ResizeNearestNeighborOp : public OpKernel {
// Initialize shape to the batch size of the input, then add
// the rest of the dimensions
Tensor* output = nullptr;
- OP_REQUIRES_OK(context, context->allocate_output(
- 0, TensorShape({input.dim_size(0), sizes(0),
- sizes(1), input.dim_size(3)}),
- &output));
+ OP_REQUIRES_OK(
+ context, context->allocate_output(0, TensorShape({input.dim_size(0), sizes(0),
+ sizes(1), input.dim_size(3)}),
+ &output));
const int64 batch_size = input.dim_size(0);
const int64 in_height = input.dim_size(1);
@@ -132,10 +136,10 @@ class ResizeNearestNeighborOpGrad : public OpKernel {
// Initialize shape to the batch size of the input, then add
// the rest of the dimensions
Tensor* output = nullptr;
- OP_REQUIRES_OK(context, context->allocate_output(
- 0, TensorShape({input.dim_size(0), sizes(0),
- sizes(1), input.dim_size(3)}),
- &output));
+ OP_REQUIRES_OK(
+ context, context->allocate_output(0, TensorShape({input.dim_size(0), sizes(0),
+ sizes(1), input.dim_size(3)}),
+ &output));
const int64 batch_size = input.dim_size(0);
const int64 in_height = input.dim_size(1);
@@ -204,4 +208,83 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNEL);
#undef REGISTER_KERNEL
+#if GOOGLE_CUDA
+
+template <typename T>
+class ResizeNearestNeighborGPUOp : public OpKernel {
+ public:
+ explicit ResizeNearestNeighborGPUOp(OpKernelConstruction* context)
+ : OpKernel(context) {
+ OP_REQUIRES_OK(context, context->GetAttr("align_corners", &align_corners_));
+ }
+
+ void Compute(OpKernelContext* context) override {
+ const Tensor& input = context->input(0);
+ OP_REQUIRES(context, input.dims() == 4,
+ errors::InvalidArgument("input must be 4-dimensional",
+ input.shape().DebugString()));
+ const Tensor& shape_t = context->input(1);
+ OP_REQUIRES(context, shape_t.dims() == 1,
+ errors::InvalidArgument("shape_t must be 1-dimensional",
+ shape_t.shape().DebugString()));
+ OP_REQUIRES(context, shape_t.NumElements() == 2,
+ errors::InvalidArgument("shape_t must have two elements",
+ shape_t.shape().DebugString()));
+
+ auto sizes = shape_t.vec<int32>();
+ OP_REQUIRES(context, sizes(0) > 0 && sizes(1) > 0,
+ errors::InvalidArgument("shape_t's elements must be positive"));
+
+ // Initialize shape to the batch size of the input, then add
+ // the rest of the dimensions
+ Tensor* output = nullptr;
+ OP_REQUIRES_OK(
+ context, context->allocate_output(0, TensorShape({input.dim_size(0), sizes(0),
+ sizes(1), input.dim_size(3)}),
+ &output));
+
+ const int64 batch_size = input.dim_size(0);
+ const int64 in_height = input.dim_size(1);
+ const int64 in_width = input.dim_size(2);
+ const int64 channels = input.dim_size(3);
+ const int64 out_height = output->dim_size(1);
+ const int64 out_width = output->dim_size(2);
+
+ const float height_scale =
+ (align_corners_ && out_height > 1)
+ ? (in_height - 1) / static_cast<float>(out_height - 1)
+ : in_height / static_cast<float>(out_height);
+ const float width_scale =
+ (align_corners_ && out_width > 1)
+ ? (in_width - 1) / static_cast<float>(out_width - 1)
+ : in_width / static_cast<float>(out_width);
+
+ bool status = ResizeNearestNeighbor<T>(
+ input.flat<T>().data(), batch_size, in_height,
+ in_width, channels, out_height, out_width,
+ height_scale, width_scale, output->flat<T>().data(),
+ context->eigen_gpu_device());
+
+ if (!status) {
+ context->SetStatus(
+ errors::Internal("Failed launching ResizeNearestNeighbor"));
+ }
+ }
+ private:
+ bool align_corners_;
+};
+
+#define REGISTER_KERNEL(T) \
+ REGISTER_KERNEL_BUILDER(Name("ResizeNearestNeighbor") \
+ .Device(DEVICE_GPU) \
+ .TypeConstraint<T>("T") \
+ .HostMemory("size"), \
+ ResizeNearestNeighborGPUOp<T>);
+
+TF_CALL_GPU_NUMBER_TYPES(REGISTER_KERNEL);
+
+#undef REGISTER_KERNEL
+
+#endif // GOOGLE_CUDA
+
} // namespace tensorflow