diff options
Diffstat (limited to 'tensorflow/core/kernels/bucketize_op_gpu.cu.cc')
-rw-r--r-- | tensorflow/core/kernels/bucketize_op_gpu.cu.cc | 101 |
1 files changed, 0 insertions, 101 deletions
diff --git a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc b/tensorflow/core/kernels/bucketize_op_gpu.cu.cc deleted file mode 100644 index aafbbe41b4..0000000000 --- a/tensorflow/core/kernels/bucketize_op_gpu.cu.cc +++ /dev/null @@ -1,101 +0,0 @@ -/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#if GOOGLE_CUDA - -#define EIGEN_USE_GPU - -#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" - -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/bucketize_op.h" -#include "tensorflow/core/kernels/cuda_device_array.h" -#include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/platform/types.h" -#include "tensorflow/core/util/cuda_kernel_helper.h" - -namespace tensorflow { - -typedef Eigen::GpuDevice GPUDevice; - -template <typename T> -__global__ void BucketizeCustomKernel( - const int32 size_in, const T* in, const int32 size_boundaries, - CudaDeviceArrayStruct<float> boundaries_array, int32* out) { - const float* boundaries = GetCudaDeviceArrayOnDevice(&boundaries_array); - CUDA_1D_KERNEL_LOOP(i, size_in) { - T value = in[i]; - int32 bucket = 0; - int32 count = size_boundaries; - while (count > 0) { - int32 l = bucket; - int32 step = count / 2; - l += step; - if (!(value < static_cast<T>(boundaries[l]))) { - bucket = ++l; - count -= step + 1; - } else { - count = step; - } - } - out[i] = bucket; - } -} - -namespace functor { - -template <typename T> -struct BucketizeFunctor<GPUDevice, T> { - // PRECONDITION: boundaries_vector must be sorted. - static Status Compute(OpKernelContext* context, - const typename TTypes<T, 1>::ConstTensor& input, - const std::vector<float>& boundaries_vector, - typename TTypes<int32, 1>::Tensor& output) { - const GPUDevice& d = context->eigen_device<GPUDevice>(); - - CudaDeviceArrayOnHost<float> boundaries_array(context, - boundaries_vector.size()); - TF_RETURN_IF_ERROR(boundaries_array.Init()); - for (int i = 0; i < boundaries_vector.size(); ++i) { - boundaries_array.Set(i, boundaries_vector[i]); - } - TF_RETURN_IF_ERROR(boundaries_array.Finalize()); - - CudaLaunchConfig config = GetCudaLaunchConfig(input.size(), d); - BucketizeCustomKernel< - T><<<config.block_count, config.thread_per_block, 0, d.stream()>>>( - input.size(), input.data(), boundaries_vector.size(), - boundaries_array.data(), output.data()); - - return Status::OK(); - } -}; -} // namespace functor - -#define REGISTER_GPU_SPEC(type) \ - template struct functor::BucketizeFunctor<GPUDevice, type>; - -REGISTER_GPU_SPEC(int32); -REGISTER_GPU_SPEC(int64); -REGISTER_GPU_SPEC(float); -REGISTER_GPU_SPEC(double); -#undef REGISTER_GPU_SPEC - -} // namespace tensorflow - -#endif // GOOGLE_CUDA |