diff options
author | Benoit Steiner <benoit.steiner.goog@gmail.com> | 2016-03-11 09:28:59 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2016-03-11 11:42:23 -0800 |
commit | 0e1a324c7bb73d80e0e2d069396d61115e0d9096 (patch) | |
tree | c56db436136b2f44d24b0c9d65f4a3a481e06a88 | |
parent | fc53648e0c0f5110bfab75a02b9dc75260b913d3 (diff) |
Improved the performance of the slicing operation on GPU
Change: 116979767
-rw-r--r-- | tensorflow/core/kernels/slice_op.h | 14 |
1 files changed, 13 insertions, 1 deletions
diff --git a/tensorflow/core/kernels/slice_op.h b/tensorflow/core/kernels/slice_op.h index 67f9ad31b1..a906724be9 100644 --- a/tensorflow/core/kernels/slice_op.h +++ b/tensorflow/core/kernels/slice_op.h @@ -30,7 +30,19 @@ struct Slice { typename TTypes<T, NDIMS>::ConstTensor input, const Eigen::DSizes<Eigen::DenseIndex, NDIMS>& slice_indices, const Eigen::DSizes<Eigen::DenseIndex, NDIMS>& slice_sizes) { - output.device(d) = input.slice(slice_indices, slice_sizes); + if (Eigen::internal::is_same<Device, Eigen::GpuDevice>::value) { + Eigen::DSizes<int, NDIMS> indices; + for (int i = 0; i < NDIMS; ++i) { + indices[i] = slice_indices[i]; + } + Eigen::DSizes<int, NDIMS> sizes; + for (int i = 0; i < NDIMS; ++i) { + sizes[i] = slice_sizes[i]; + } + To32Bit(output).device(d) = To32Bit(input).slice(indices, sizes); + } else { + output.device(d) = input.slice(slice_indices, slice_sizes); + } } }; |