aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Justin Lebar <jlebar@google.com>2018-09-25 17:20:42 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-09-25 17:25:18 -0700
commit05d103bf25110157c34b9ea6420061a23aa6d4ec (patch)
tree2b73a1666f7cfdab5cd34bedc9cdda8cc7316852
parent4177bc92c3b9301877521ba9b26377b80fa27601 (diff)
[XLA:GPU] Pad convolution features of size 3 up to 4.
PiperOrigin-RevId: 214532043
-rw-r--r--tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc23
1 files changed, 20 insertions, 3 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc b/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc
index 2d270f630b..e3869b5c36 100644
--- a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc
+++ b/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc
@@ -37,15 +37,32 @@ static constexpr int64 kDesiredNumFeaturesFactor = 8;
// there's additional room for speedups. Achieving those speedups without also
// slowing other things down will likely require a more sophisticated heuristic,
// possibly some form of auto-tuning.
-static constexpr double kMaxBytesTouchedIncrease = 1.2;
+//
+// This value should be >= 4/3, otherwise the "dims of size 3 padded up to 4"
+// special case inside PadShape won't fire.
+static constexpr double kMaxBytesTouchedIncrease = 1.35;
// Pads the given dimensions in the given shape up to a multiple of
// kDesiredNumFeaturesFactor.
static Shape PadShape(Shape s, absl::Span<const int64> dims) {
for (int64 dim : dims) {
int64 dim_to_pad_size = s.dimensions(dim);
- int64 new_dim_to_pad_size =
- RoundUpToNearest(dim_to_pad_size, kDesiredNumFeaturesFactor);
+
+ // Round dim_to_pad_size up to the next multiple of
+ // kDesiredNumFeaturesFactor.
+ //
+ // Special case: dims of size 3 are rounded up to 4, not
+ // kDesiredNumFeaturesFactor. Empirically (and on the advice of nvidia),
+ // this helps, but as of writing, it's not supported by anything in the
+ // cudnn docs.
+ int64 new_dim_to_pad_size;
+ if (dim_to_pad_size == 3) {
+ new_dim_to_pad_size = 4;
+ } else {
+ new_dim_to_pad_size =
+ RoundUpToNearest(dim_to_pad_size, kDesiredNumFeaturesFactor);
+ }
+
s.set_dimensions(dim, new_dim_to_pad_size);
}
return s;