aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-03-11 09:28:59 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-03-11 11:42:23 -0800
commit0e1a324c7bb73d80e0e2d069396d61115e0d9096 (patch)
treec56db436136b2f44d24b0c9d65f4a3a481e06a88
parentfc53648e0c0f5110bfab75a02b9dc75260b913d3 (diff)
Improved the performance of the slicing operation on GPU
Change: 116979767
-rw-r--r--tensorflow/core/kernels/slice_op.h14
1 files changed, 13 insertions, 1 deletions
diff --git a/tensorflow/core/kernels/slice_op.h b/tensorflow/core/kernels/slice_op.h
index 67f9ad31b1..a906724be9 100644
--- a/tensorflow/core/kernels/slice_op.h
+++ b/tensorflow/core/kernels/slice_op.h
@@ -30,7 +30,19 @@ struct Slice {
typename TTypes<T, NDIMS>::ConstTensor input,
const Eigen::DSizes<Eigen::DenseIndex, NDIMS>& slice_indices,
const Eigen::DSizes<Eigen::DenseIndex, NDIMS>& slice_sizes) {
- output.device(d) = input.slice(slice_indices, slice_sizes);
+ if (Eigen::internal::is_same<Device, Eigen::GpuDevice>::value) {
+ Eigen::DSizes<int, NDIMS> indices;
+ for (int i = 0; i < NDIMS; ++i) {
+ indices[i] = slice_indices[i];
+ }
+ Eigen::DSizes<int, NDIMS> sizes;
+ for (int i = 0; i < NDIMS; ++i) {
+ sizes[i] = slice_sizes[i];
+ }
+ To32Bit(output).device(d) = To32Bit(input).slice(indices, sizes);
+ } else {
+ output.device(d) = input.slice(slice_indices, slice_sizes);
+ }
}
};