diff options
author | Justin Lebar <jlebar@google.com> | 2018-09-25 17:20:42 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-09-25 17:25:18 -0700 |
commit | 05d103bf25110157c34b9ea6420061a23aa6d4ec (patch) | |
tree | 2b73a1666f7cfdab5cd34bedc9cdda8cc7316852 /tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc | |
parent | 4177bc92c3b9301877521ba9b26377b80fa27601 (diff) |
[XLA:GPU] Pad convolution features of size 3 up to 4.
PiperOrigin-RevId: 214532043
Diffstat (limited to 'tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc')
-rw-r--r-- | tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc | 23 |
1 files changed, 20 insertions, 3 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc b/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc index 2d270f630b..e3869b5c36 100644 --- a/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc +++ b/tensorflow/compiler/xla/service/gpu/pad_for_tensor_cores.cc @@ -37,15 +37,32 @@ static constexpr int64 kDesiredNumFeaturesFactor = 8; // there's additional room for speedups. Achieving those speedups without also // slowing other things down will likely require a more sophisticated heuristic, // possibly some form of auto-tuning. -static constexpr double kMaxBytesTouchedIncrease = 1.2; +// +// This value should be >= 4/3, otherwise the "dims of size 3 padded up to 4" +// special case inside PadShape won't fire. +static constexpr double kMaxBytesTouchedIncrease = 1.35; // Pads the given dimensions in the given shape up to a multiple of // kDesiredNumFeaturesFactor. static Shape PadShape(Shape s, absl::Span<const int64> dims) { for (int64 dim : dims) { int64 dim_to_pad_size = s.dimensions(dim); - int64 new_dim_to_pad_size = - RoundUpToNearest(dim_to_pad_size, kDesiredNumFeaturesFactor); + + // Round dim_to_pad_size up to the next multiple of + // kDesiredNumFeaturesFactor. + // + // Special case: dims of size 3 are rounded up to 4, not + // kDesiredNumFeaturesFactor. Empirically (and on the advice of nvidia), + // this helps, but as of writing, it's not supported by anything in the + // cudnn docs. + int64 new_dim_to_pad_size; + if (dim_to_pad_size == 3) { + new_dim_to_pad_size = 4; + } else { + new_dim_to_pad_size = + RoundUpToNearest(dim_to_pad_size, kDesiredNumFeaturesFactor); + } + s.set_dimensions(dim, new_dim_to_pad_size); } return s; |