aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/kernels/bucketize_op_gpu.cu.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/kernels/bucketize_op_gpu.cu.cc')
-rw-r--r--tensorflow/core/kernels/bucketize_op_gpu.cu.cc101
1 files changed, 101 insertions, 0 deletions
diff --git a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc
new file mode 100644
index 0000000000..aafbbe41b4
--- /dev/null
+++ b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc
@@ -0,0 +1,101 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "tensorflow/core/framework/tensor.h"
+#include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/kernels/bucketize_op.h"
+#include "tensorflow/core/kernels/cuda_device_array.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/util/cuda_kernel_helper.h"
+
+namespace tensorflow {
+
+typedef Eigen::GpuDevice GPUDevice;
+
+template <typename T>
+__global__ void BucketizeCustomKernel(
+ const int32 size_in, const T* in, const int32 size_boundaries,
+ CudaDeviceArrayStruct<float> boundaries_array, int32* out) {
+ const float* boundaries = GetCudaDeviceArrayOnDevice(&boundaries_array);
+ CUDA_1D_KERNEL_LOOP(i, size_in) {
+ T value = in[i];
+ int32 bucket = 0;
+ int32 count = size_boundaries;
+ while (count > 0) {
+ int32 l = bucket;
+ int32 step = count / 2;
+ l += step;
+ if (!(value < static_cast<T>(boundaries[l]))) {
+ bucket = ++l;
+ count -= step + 1;
+ } else {
+ count = step;
+ }
+ }
+ out[i] = bucket;
+ }
+}
+
+namespace functor {
+
+template <typename T>
+struct BucketizeFunctor<GPUDevice, T> {
+ // PRECONDITION: boundaries_vector must be sorted.
+ static Status Compute(OpKernelContext* context,
+ const typename TTypes<T, 1>::ConstTensor& input,
+ const std::vector<float>& boundaries_vector,
+ typename TTypes<int32, 1>::Tensor& output) {
+ const GPUDevice& d = context->eigen_device<GPUDevice>();
+
+ CudaDeviceArrayOnHost<float> boundaries_array(context,
+ boundaries_vector.size());
+ TF_RETURN_IF_ERROR(boundaries_array.Init());
+ for (int i = 0; i < boundaries_vector.size(); ++i) {
+ boundaries_array.Set(i, boundaries_vector[i]);
+ }
+ TF_RETURN_IF_ERROR(boundaries_array.Finalize());
+
+ CudaLaunchConfig config = GetCudaLaunchConfig(input.size(), d);
+ BucketizeCustomKernel<
+ T><<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
+ input.size(), input.data(), boundaries_vector.size(),
+ boundaries_array.data(), output.data());
+
+ return Status::OK();
+ }
+};
+} // namespace functor
+
+#define REGISTER_GPU_SPEC(type) \
+ template struct functor::BucketizeFunctor<GPUDevice, type>;
+
+REGISTER_GPU_SPEC(int32);
+REGISTER_GPU_SPEC(int64);
+REGISTER_GPU_SPEC(float);
+REGISTER_GPU_SPEC(double);
+#undef REGISTER_GPU_SPEC
+
+} // namespace tensorflow
+
+#endif // GOOGLE_CUDA