aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Benoit Steiner <benoit.steiner.goog@gmail.com>2016-03-09 20:04:53 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-03-09 20:14:04 -0800
commitc258bca8b08eb1a51e02c4ac1900daf1603850a0 (patch)
treee8873db38408fddf792698a3cc69be30c8fc737e
parent9f0b11170d6e88c7afb3b11f6165d21144b0e59b (diff)
Use 32bit indexing to pad tensors. This more than doubles the performance of
the corresponding TensorFlow operation. Change: 116831272
-rw-r--r--tensorflow/core/kernels/pad_op.h6
1 files changed, 5 insertions, 1 deletions
diff --git a/tensorflow/core/kernels/pad_op.h b/tensorflow/core/kernels/pad_op.h
index cdcd248bd9..fcc808b01f 100644
--- a/tensorflow/core/kernels/pad_op.h
+++ b/tensorflow/core/kernels/pad_op.h
@@ -32,7 +32,11 @@ struct Pad {
void operator()(const Device& d, typename TTypes<T, Dims>::Tensor output,
typename TTypes<T, Dims>::ConstTensor input,
Eigen::array<std::pair<int32, int32>, Dims> paddings) {
- output.device(d) = input.pad(paddings);
+ if (Eigen::internal::is_same<Device, Eigen::GpuDevice>::value) {
+ To32Bit(output).device(d) = To32Bit(input).pad(paddings);
+ } else {
+ output.device(d) = input.pad(paddings);
+ }
}
};