diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2018-01-26 05:15:18 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-01-26 05:22:51 -0800 |
commit | abdc62aee1eeba32be56d761a2f9988306356084 (patch) | |
tree | d49fa0848904b212443245702923255ba18cca58 /tensorflow/contrib/reduce_slice_ops | |
parent | c8c2e4932afccb594bfe05e22facea1aba9dd454 (diff) |
Roll CL 179861781 forward with fix: Wrappers for CUDA 9 warp-synchronous intrinsics.
PiperOrigin-RevId: 183374082
Diffstat (limited to 'tensorflow/contrib/reduce_slice_ops')
-rw-r--r-- | tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops_gpu.cu.cc | 11 |
1 files changed, 6 insertions, 5 deletions
diff --git a/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops_gpu.cu.cc b/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops_gpu.cu.cc index 8e6870fadd..501cddb8c8 100644 --- a/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops_gpu.cu.cc +++ b/tensorflow/contrib/reduce_slice_ops/kernels/reduce_slice_ops_gpu.cu.cc @@ -34,9 +34,9 @@ namespace functor { __global__ void ReduceSliceDeviceKernel##reduceop( \ Cuda3DLaunchConfig config, Index indices_width, Index bound, \ const T begin, const Index *indices, const T *input, T *out) { \ - CUDA_AXIS_KERNEL_LOOP(x, config.virtual_thread_count, x) { \ - CUDA_AXIS_KERNEL_LOOP(y, config.virtual_thread_count, y) { \ - CUDA_AXIS_KERNEL_LOOP(z, config.virtual_thread_count, z) { \ + CUDA_AXIS_KERNEL_LOOP(x, config.virtual_thread_count.x, X) { \ + CUDA_AXIS_KERNEL_LOOP(y, config.virtual_thread_count.y, Y) { \ + CUDA_AXIS_KERNEL_LOOP(z, config.virtual_thread_count.z, Z) { \ Index outidx = x * config.virtual_thread_count.y * \ config.virtual_thread_count.z + \ y * config.virtual_thread_count.z + z; \ @@ -68,8 +68,9 @@ namespace functor { if (sizex * sizey * sizez == 0) { \ return; \ } \ - Cuda3DLaunchConfig config = GetCuda3DLaunchConfig(sizex, sizey, sizez, d,\ - ReduceSliceDeviceKernel##reduceop<T, Index>, 0, 0); \ + Cuda3DLaunchConfig config = GetCuda3DLaunchConfig( \ + sizex, sizey, sizez, d, ReduceSliceDeviceKernel##reduceop<T, Index>, \ + 0, 0); \ \ ReduceSliceDeviceKernel##reduceop<T, Index> \ <<<config.block_count, config.thread_per_block, 0, d.stream()>>>( \ |