diff options
Diffstat (limited to 'tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.cu.cc')
-rw-r--r-- | tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.cu.cc | 86 |
1 files changed, 86 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.cu.cc b/tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.cu.cc new file mode 100644 index 0000000000..bee24a5b02 --- /dev/null +++ b/tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.cu.cc @@ -0,0 +1,86 @@ +/* Copyright 2015 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#if GOOGLE_CUDA + +#define EIGEN_USE_GPU + +#include <stdio.h> + +#include "tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.h" + +#include "tensorflow/core/framework/register_types.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/util/cuda_kernel_helper.h" + +namespace tensorflow { +namespace { + +template <typename T> +__global__ void ResizeNearestNeighborNHWC(const int nthreads, const T* bottom_data, + const int in_height, const int in_width, + const int channels, const int out_height, + const int out_width, const float height_scale, + const float width_scale, T* top_data) { + CUDA_1D_KERNEL_LOOP(index, nthreads) { + int n = index; + int c = n % channels; + n /= channels; + int out_x = n % out_width; + n /= out_width; + int out_y = n % out_height; + n /= out_height; + + const T* bottom_data_n = bottom_data + n * channels * in_height * in_width; + const int in_x = min(static_cast<int>(floorf(out_x * width_scale)), in_width - 1); + const int in_y = min(static_cast<int>(floorf(out_y * height_scale)), in_height - 1); + const int idx = (in_y * in_width + in_x) * channels + c; + top_data[index] = ldg(bottom_data_n + idx); + } +} + +} // namespace + +template <typename T> +bool ResizeNearestNeighbor(const T* bottom_data, const int batch, + const int in_height, const int in_width, + const int channels, const int out_height, + const int out_width, const float height_scale, + const float width_scale, T* top_data, + const Eigen::GpuDevice& d) { + const int output_size = batch * channels * out_height * out_width; + CudaLaunchConfig config = GetCudaLaunchConfig(output_size, d); + + ResizeNearestNeighborNHWC<T> + <<<config.block_count, config.thread_per_block, 0, d.stream()>>>( + output_size, bottom_data, in_height, in_width, channels, out_height, + out_width, height_scale, width_scale, top_data); + return d.ok(); +} + +#define DECLARE_GPU_SPEC(T) \ + template bool ResizeNearestNeighbor(const T* bottom_data, const int batch, \ + const int in_height, const int in_width, \ + const int channels, const int out_height, \ + const int out_width, const float height_scale, \ + const float width_scale, T* top_data, \ + const Eigen::GpuDevice& d); + +TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC); + +#undef DECLARE_GPU_SPEC +} // end namespace tensorflow + +#endif // GOOGLE_CUDA |