diff options
author | Eugene Brevdo <ebrevdo@gmail.com> | 2016-03-10 17:18:30 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2016-03-11 11:41:23 -0800 |
commit | 56f1d64998744ad655fe5c428658a13be35b865e (patch) | |
tree | 1c4e5ec1192835898b9e17f462cf62838534add2 /tensorflow/core/kernels/resize_nearest_neighbor_op.cc | |
parent | 64dd5b58d52d37697d5beb68e2177b966108e0a7 (diff) |
Fix dependencies bugs
Change: 116925769
Diffstat (limited to 'tensorflow/core/kernels/resize_nearest_neighbor_op.cc')
-rw-r--r-- | tensorflow/core/kernels/resize_nearest_neighbor_op.cc | 99 |
1 files changed, 91 insertions, 8 deletions
diff --git a/tensorflow/core/kernels/resize_nearest_neighbor_op.cc b/tensorflow/core/kernels/resize_nearest_neighbor_op.cc index c3ed9914c9..059ef83bb0 100644 --- a/tensorflow/core/kernels/resize_nearest_neighbor_op.cc +++ b/tensorflow/core/kernels/resize_nearest_neighbor_op.cc @@ -26,6 +26,10 @@ limitations under the License. #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/logging.h" +#if GOOGLE_CUDA +#include "tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.h" +#endif // GOOGLE_CUDA + namespace tensorflow { typedef Eigen::ThreadPoolDevice CPUDevice; @@ -58,10 +62,10 @@ class ResizeNearestNeighborOp : public OpKernel { // Initialize shape to the batch size of the input, then add // the rest of the dimensions Tensor* output = nullptr; - OP_REQUIRES_OK(context, context->allocate_output( - 0, TensorShape({input.dim_size(0), sizes(0), - sizes(1), input.dim_size(3)}), - &output)); + OP_REQUIRES_OK( + context, context->allocate_output(0, TensorShape({input.dim_size(0), sizes(0), + sizes(1), input.dim_size(3)}), + &output)); const int64 batch_size = input.dim_size(0); const int64 in_height = input.dim_size(1); @@ -132,10 +136,10 @@ class ResizeNearestNeighborOpGrad : public OpKernel { // Initialize shape to the batch size of the input, then add // the rest of the dimensions Tensor* output = nullptr; - OP_REQUIRES_OK(context, context->allocate_output( - 0, TensorShape({input.dim_size(0), sizes(0), - sizes(1), input.dim_size(3)}), - &output)); + OP_REQUIRES_OK( + context, context->allocate_output(0, TensorShape({input.dim_size(0), sizes(0), + sizes(1), input.dim_size(3)}), + &output)); const int64 batch_size = input.dim_size(0); const int64 in_height = input.dim_size(1); @@ -204,4 +208,83 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNEL); #undef REGISTER_KERNEL +#if GOOGLE_CUDA + +template <typename T> +class ResizeNearestNeighborGPUOp : public OpKernel { + public: + explicit ResizeNearestNeighborGPUOp(OpKernelConstruction* context) + : OpKernel(context) { + OP_REQUIRES_OK(context, context->GetAttr("align_corners", &align_corners_)); + } + + void Compute(OpKernelContext* context) override { + const Tensor& input = context->input(0); + OP_REQUIRES(context, input.dims() == 4, + errors::InvalidArgument("input must be 4-dimensional", + input.shape().DebugString())); + const Tensor& shape_t = context->input(1); + OP_REQUIRES(context, shape_t.dims() == 1, + errors::InvalidArgument("shape_t must be 1-dimensional", + shape_t.shape().DebugString())); + OP_REQUIRES(context, shape_t.NumElements() == 2, + errors::InvalidArgument("shape_t must have two elements", + shape_t.shape().DebugString())); + + auto sizes = shape_t.vec<int32>(); + OP_REQUIRES(context, sizes(0) > 0 && sizes(1) > 0, + errors::InvalidArgument("shape_t's elements must be positive")); + + // Initialize shape to the batch size of the input, then add + // the rest of the dimensions + Tensor* output = nullptr; + OP_REQUIRES_OK( + context, context->allocate_output(0, TensorShape({input.dim_size(0), sizes(0), + sizes(1), input.dim_size(3)}), + &output)); + + const int64 batch_size = input.dim_size(0); + const int64 in_height = input.dim_size(1); + const int64 in_width = input.dim_size(2); + const int64 channels = input.dim_size(3); + const int64 out_height = output->dim_size(1); + const int64 out_width = output->dim_size(2); + + const float height_scale = + (align_corners_ && out_height > 1) + ? (in_height - 1) / static_cast<float>(out_height - 1) + : in_height / static_cast<float>(out_height); + const float width_scale = + (align_corners_ && out_width > 1) + ? (in_width - 1) / static_cast<float>(out_width - 1) + : in_width / static_cast<float>(out_width); + + bool status = ResizeNearestNeighbor<T>( + input.flat<T>().data(), batch_size, in_height, + in_width, channels, out_height, out_width, + height_scale, width_scale, output->flat<T>().data(), + context->eigen_gpu_device()); + + if (!status) { + context->SetStatus( + errors::Internal("Failed launching ResizeNearestNeighbor")); + } + } + private: + bool align_corners_; +}; + +#define REGISTER_KERNEL(T) \ + REGISTER_KERNEL_BUILDER(Name("ResizeNearestNeighbor") \ + .Device(DEVICE_GPU) \ + .TypeConstraint<T>("T") \ + .HostMemory("size"), \ + ResizeNearestNeighborGPUOp<T>); + +TF_CALL_GPU_NUMBER_TYPES(REGISTER_KERNEL); + +#undef REGISTER_KERNEL + +#endif // GOOGLE_CUDA + } // namespace tensorflow |