aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.cu.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.cu.cc')
-rw-r--r--tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.cu.cc86
1 files changed, 86 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.cu.cc b/tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.cu.cc
new file mode 100644
index 0000000000..bee24a5b02
--- /dev/null
+++ b/tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.cu.cc
@@ -0,0 +1,86 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include <stdio.h>
+
+#include "tensorflow/core/kernels/resize_nearest_neighbor_op_gpu.h"
+
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor_types.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
+
+namespace tensorflow {
+namespace {
+
+template <typename T>
+__global__ void ResizeNearestNeighborNHWC(const int nthreads, const T* bottom_data,
+ const int in_height, const int in_width,
+ const int channels, const int out_height,
+ const int out_width, const float height_scale,
+ const float width_scale, T* top_data) {
+ CUDA_1D_KERNEL_LOOP(index, nthreads) {
+ int n = index;
+ int c = n % channels;
+ n /= channels;
+ int out_x = n % out_width;
+ n /= out_width;
+ int out_y = n % out_height;
+ n /= out_height;
+
+ const T* bottom_data_n = bottom_data + n * channels * in_height * in_width;
+ const int in_x = min(static_cast<int>(floorf(out_x * width_scale)), in_width - 1);
+ const int in_y = min(static_cast<int>(floorf(out_y * height_scale)), in_height - 1);
+ const int idx = (in_y * in_width + in_x) * channels + c;
+ top_data[index] = ldg(bottom_data_n + idx);
+ }
+}
+
+} // namespace
+
+template <typename T>
+bool ResizeNearestNeighbor(const T* bottom_data, const int batch,
+ const int in_height, const int in_width,
+ const int channels, const int out_height,
+ const int out_width, const float height_scale,
+ const float width_scale, T* top_data,
+ const Eigen::GpuDevice& d) {
+ const int output_size = batch * channels * out_height * out_width;
+ CudaLaunchConfig config = GetCudaLaunchConfig(output_size, d);
+
+ ResizeNearestNeighborNHWC<T>
+ <<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
+ output_size, bottom_data, in_height, in_width, channels, out_height,
+ out_width, height_scale, width_scale, top_data);
+ return d.ok();
+}
+
+#define DECLARE_GPU_SPEC(T) \
+ template bool ResizeNearestNeighbor(const T* bottom_data, const int batch, \
+ const int in_height, const int in_width, \
+ const int channels, const int out_height, \
+ const int out_width, const float height_scale, \
+ const float width_scale, T* top_data, \
+ const Eigen::GpuDevice& d);
+
+TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC);
+
+#undef DECLARE_GPU_SPEC
+} // end namespace tensorflow
+
+#endif // GOOGLE_CUDA