diff options
Diffstat (limited to 'tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc')
-rw-r--r-- | tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc index 8bf62dde8b..09ef62c87f 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_layout_assignment.cc @@ -51,7 +51,7 @@ HeuristicLayoutAssignment(const HloInstruction* instr, // H <=> Y // W <=> X // - // Therefore kOutputInputYX means NHWC; kBatchDepthYX means NCHW. + // Therefore kOutputInputYX and kBatchDepthYX mean NCHW. // As of today, our empirical evidence is that cudnn 7.0 is faster on V100 x // fp16 with the mostly-NHWC layout. The heuristic may change as cudnn version |