Add autotuning code for matmul operator.

Currently it is turned off by default. PiperOrigin-RevId: 162423171
author: Yangzihao Wang <yangzihao@google.com> 2017-07-18 16:48:30 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-07-18 16:52:54 -0700
commit: 06acccabcb41513c76bbfffcd17817a7b136494b (patch)
tree: 82c2b379a5d8d4aa6cbbb653d3020ee2504bbd58 /tensorflow/core/util
parent: 11dff5b05b3488520d3a415173d73ae91fded092 (diff)
2 files changed, 79 insertions, 0 deletions
diff --git a/tensorflow/core/util/matmul_autotune.cc b/tensorflow/core/util/matmul_autotune.cc
new file mode 100644
index 0000000000..741a78a193
--- /dev/null
+++ b/tensorflow/core/util/matmul_autotune.cc
@@ -0,0 +1,51 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/util/matmul_autotune.h"
+
+#include "tensorflow/core/framework/types.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/util/env_var.h"
+
+namespace tensorflow {
+bool MatmulAutotuneEnable() {
+  bool value;
+  Status status =
+      ReadBoolFromEnvVar("TF_MATMUL_AUTOTUNE_ENABLE", false, &value);
+  if (!status.ok()) {
+    LOG(ERROR) << status.error_message();
+  }
+  return value;
+}
+
+bool MatmulDoFP32ComputationFP16Input() {
+  bool value;
+  // Feedback from NVIDIA: the "true floating point 16" compute capability is
+  // absent from compute capability SM 5.2. The native 16 bit floating point
+  // computation was introduced in SM 5.3 and higher compute capability. So
+  // for compatibility, set this to be true by default for now.
+  // TODO(yangzihao): In the future, we need to return three possibilities:
+  // user-set-true, user-set-false, user-no-setting. In the calling sites,
+  // check the compatibilities. Note that user-set-false with compute
+  // capability <= 5.2 will cause an error in the later cublasGemmEx() call.
+  Status status =
+      ReadBoolFromEnvVar("TF_FP16_MATMUL_USE_FP32_COMPUTE", true, &value);
+  if (!status.ok()) {
+    LOG(ERROR) << status.error_message();
+  }
+  return value;
+}
+
+}  // namespace tensorflow
diff --git a/tensorflow/core/util/matmul_autotune.h b/tensorflow/core/util/matmul_autotune.h
new file mode 100644
index 0000000000..5366623883
--- /dev/null
+++ b/tensorflow/core/util/matmul_autotune.h
@@ -0,0 +1,28 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// The utility to check matmul autotune related flags.
+
+#ifndef THIRD_PARTY_TENSORFLOW_CORE_UTIL_MATMUL_AUTOTUNE_H_
+#define THIRD_PARTY_TENSORFLOW_CORE_UTIL_MATMUL_AUTOTUNE_H_
+
+namespace tensorflow {
+
+bool MatmulAutotuneEnable();
+bool MatmulDoFP32ComputationFP16Input();
+
+}  // namespace tensorflow
+
+#endif  // THIRD_PARTY_TENSORFLOW_CORE_UTIL_MATMUL_AUTOTUNE_H_
author	Yangzihao Wang <yangzihao@google.com>	2017-07-18 16:48:30 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-07-18 16:52:54 -0700
commit	06acccabcb41513c76bbfffcd17817a7b136494b (patch)
tree	82c2b379a5d8d4aa6cbbb653d3020ee2504bbd58 /tensorflow/core/util
parent	11dff5b05b3488520d3a415173d73ae91fded092 (diff)