Add new env-var TF_FP16_CONV_MODE.

The env-var is used to decide how to do internal computation for cudnn convolution when input data type is float16. For ACCURATE mode, we always use float32 as the internal compute type; For FAST mode, we include both float16 and float32 internal compute type into auto-tune to pick whichever runs faster. PiperOrigin-RevId: 174495814
author: Yangzihao Wang <yangzihao@google.com> 2017-11-03 12:14:01 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-11-03 12:17:46 -0700
commit: 832ffc71a2d4182a49a2353ff125f2624bd52f0f (patch)
tree: 89b2bd421e10025f760f6c682f93a2fe83adbe94
parent: 509d0f2ca7f988d294d7234d31fac6a1cedcc39b (diff)
4 files changed, 55 insertions, 8 deletions
diff --git a/tensorflow/core/util/env_var.cc b/tensorflow/core/util/env_var.cc
index d4e89b966e..c844850179 100644
--- a/tensorflow/core/util/env_var.cc
+++ b/tensorflow/core/util/env_var.cc
@@ -60,4 +60,15 @@ Status ReadInt64FromEnvVar(StringPiece env_var_name, int64 default_val,
       tf_env_var_val, ". Use the default value: ", default_val));
 }
 
+Status ReadStringFromEnvVar(StringPiece env_var_name, StringPiece default_val,
+                            string* value) {
+  const char* tf_env_var_val = getenv(env_var_name.ToString().c_str());
+  if (tf_env_var_val != nullptr) {
+    *value = tf_env_var_val;
+  } else {
+    *value = default_val.ToString();
+  }
+  return Status::OK();
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/util/env_var.h b/tensorflow/core/util/env_var.h
index ec661f1d81..47f9ff3a3b 100644
--- a/tensorflow/core/util/env_var.h
+++ b/tensorflow/core/util/env_var.h
@@ -21,20 +21,25 @@ limitations under the License.
 
 namespace tensorflow {
 
-// Return a boolean into "value" from the environmental variable "env_var_name".
-// If it is unset, the default value is used.
-// A string "0" or a case insensitive "false" is interpreted as false.
-// A string "1" or a case insensitive "true" is interpreted as true.
-// Otherwise, an error status is returned.
+// Returns a boolean into "value" from the environmental variable
+// "env_var_name". If it is unset, the default value is used. A string "0" or a
+// case insensitive "false" is interpreted as false. A string "1" or a case
+// insensitive "true" is interpreted as true. Otherwise, an error status is
+// returned.
 Status ReadBoolFromEnvVar(StringPiece env_var_name, bool default_val,
                           bool* value);
 
-// Return an int64 into "value" from the environmental variable "env_var_name".
+// Returns an int64 into "value" from the environmental variable "env_var_name".
 // If it is unset, the default value is used.
 // If the string cannot be parsed into int64, an error status is returned.
 Status ReadInt64FromEnvVar(StringPiece env_var_name, int64 default_val,
                            int64* value);
 
+// Returns a string into "value" from the environmental variable "env_var_name".
+// If it is unset, the default value is used.
+Status ReadStringFromEnvVar(StringPiece env_var_name, StringPiece default_val,
+                            string* value);
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_UTIL_ENV_VAR_H_
diff --git a/tensorflow/core/util/use_cudnn.cc b/tensorflow/core/util/use_cudnn.cc
index 3862f01ea1..d7d03f151e 100644
--- a/tensorflow/core/util/use_cudnn.cc
+++ b/tensorflow/core/util/use_cudnn.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include "tensorflow/core/util/use_cudnn.h"
 
 #include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/platform/types.h"
 #include "tensorflow/core/util/env_var.h"
 
@@ -26,7 +27,7 @@ namespace tensorflow {
     bool value;                                                            \
     Status status = ReadBoolFromEnvVar(#flag_name, default_value, &value); \
     if (!status.ok()) {                                                    \
-      LOG(ERROR) << status.error_message();                                \
+      LOG(ERROR) << status;                                                \
     }                                                                      \
     return value;                                                          \
   }
@@ -37,4 +38,24 @@ ADD_CUDNN_FLAG(CudnnDisableConv1x1Optimization,
                TF_CUDNN_DISABLE_CONV_1X1_OPTIMIZATION, false);
 
 #undef ADD_CUDNN_FLAG
+
+FP16ConvMode CudnnConvComputeMode() {
+  string value;
+  Status status = ReadStringFromEnvVar("TF_FP16_CONV_MODE", "accurate", &value);
+  if (!status.ok()) {
+    LOG(ERROR) << status;
+  }
+  string lowercase_value = str_util::Lowercase(value);
+  if (lowercase_value == "accurate") {
+    return FP16ConvMode::kAccurate;
+  } else if (lowercase_value == "fast") {
+    return FP16ConvMode::kFast;
+  } else {
+    LOG(ERROR) << "FP16ConvMode only supports two modes, ACCURATE and FAST. "
+                  "Got unknown mode: "
+               << value;
+  }
+  return FP16ConvMode::kAccurate;
+}
+
 }  // namespace tensorflow
diff --git a/tensorflow/core/util/use_cudnn.h b/tensorflow/core/util/use_cudnn.h
index 5c7d706496..a39a032e3f 100644
--- a/tensorflow/core/util/use_cudnn.h
+++ b/tensorflow/core/util/use_cudnn.h
@@ -13,16 +13,26 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-// The utility to check whether we have Cudnn dependency.
+// The utility to check Cudnn dependency and set Cudnn-related flags.
 
 #ifndef TENSORFLOW_UTIL_USE_CUDNN_H_
 #define TENSORFLOW_UTIL_USE_CUDNN_H_
 
 namespace tensorflow {
 
+// FP16ConvMode: The mode to set the internal compute type for cudnn convolution
+// when the input data type is float16. Two types of modes are supported:
+//   kAccurate: Always use float32 as the internal compute type.
+//   kFast: Include both float32 and float16 compute type in the autotune.
+enum class FP16ConvMode {
+  kAccurate = 1,
+  kFast = 2,
+};
+
 bool CanUseCudnn();
 bool CudnnUseAutotune();
 bool CudnnDisableConv1x1Optimization();
+FP16ConvMode CudnnConvComputeMode();
 
 }  // namespace tensorflow
author	Yangzihao Wang <yangzihao@google.com>	2017-11-03 12:14:01 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-11-03 12:17:46 -0700
commit	832ffc71a2d4182a49a2353ff125f2624bd52f0f (patch)
tree	89b2bd421e10025f760f6c682f93a2fe83adbe94
parent	509d0f2ca7f988d294d7234d31fac6a1cedcc39b (diff)