Add an experimental API to allow half precision for FP32 calculation.

PiperOrigin-RevId: 212535448
author: A. Unique TensorFlower <gardener@tensorflow.org> 2018-09-11 15:36:21 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2018-09-11 15:45:27 -0700
commit: 6305a6d83552ba6a472cd72398b60d9241467f1f (patch)
tree: d02fb52415cefa562db46816c230e7522a702c90 /tensorflow/contrib
parent: b1f29d4c618d6bfa85130917848cd7eb89bf0f3b (diff)
9 files changed, 86 insertions, 5 deletions
diff --git a/tensorflow/contrib/lite/c/c_api_internal.h b/tensorflow/contrib/lite/c/c_api_internal.h
index 48df68a654..34c874d1d2 100644
--- a/tensorflow/contrib/lite/c/c_api_internal.h
+++ b/tensorflow/contrib/lite/c/c_api_internal.h
@@ -374,6 +374,11 @@ typedef struct TfLiteContext {
   // WARNING: This is an experimental interface that is subject to change.
   void (*SetExternalContext)(struct TfLiteContext*, TfLiteExternalContextType,
                              TfLiteExternalContext*);
+
+  // Flag for allowing float16 precision for FP32 calculation.
+  // default: false.
+  // WARNING: This is an experimental API and subject to change.
+  bool allow_fp32_relax_to_fp16;
 } TfLiteContext;
 
 typedef struct _TfLiteRegistration {
diff --git a/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc
index e3eebac4da..c6587b3d3f 100644
--- a/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc
+++ b/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc
@@ -1115,6 +1115,14 @@ class NNAPIDelegateKernel {
     CHECK_NN(context, ANeuralNetworksModel_identifyInputsAndOutputs(
                           nn_model_.get(), inputs.size(), inputs.data(),
                           outputs.size(), outputs.data()));
+
+    // Set relaxed computation mode for fp32 if possible.
+    if (kAndroidSdkVersion >= kMinSdkVersionForNNAPI11) {
+      CHECK_NN(context,
+               ANeuralNetworksModel_relaxComputationFloat32toFloat16(
+                   nn_model_.get(), context->allow_fp32_relax_to_fp16));
+    }
+
     // Finalize the model
     CHECK_NN(context, ANeuralNetworksModel_finish(nn_model_.get()));
 
diff --git a/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate_test.cc b/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate_test.cc
index 4b01aefd6a..9626c54c74 100644
--- a/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate_test.cc
+++ b/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate_test.cc
@@ -40,13 +40,15 @@ class FloatAddOpModel : public SingleOpModelWithNNAPI {
  public:
   FloatAddOpModel(const TensorData& input1, const TensorData& input2,
                   const TensorData& output,
-                  ActivationFunctionType activation_type) {
+                  ActivationFunctionType activation_type,
+                  bool allow_fp32_relax_to_fp16 = false) {
     input1_ = AddInput(input1);
     input2_ = AddInput(input2);
     output_ = AddOutput(output);
     SetBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
                  CreateAddOptions(builder_, activation_type).Union());
-    BuildInterpreter({GetShape(input1_), GetShape(input2_)});
+    BuildInterpreter({GetShape(input1_), GetShape(input2_)},
+                     allow_fp32_relax_to_fp16);
   }
 
   int input1() { return input1_; }
@@ -71,6 +73,19 @@ TEST(NNAPIDelegate, AddWithNoActivation) {
   EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1.9, 0.4, 1.0, 1.3}));
 }
 
+// Do a test with the NN API using no activation.
+// The test allows computing FP32 with FP16 precision. In this particular case,
+// calculating in FP32 or FP16 should produce the same results.
+TEST(NNAPIDelegate, AddWithNoActivationRelaxed) {
+  FloatAddOpModel m(
+      {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
+      {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE, true);
+  m.PopulateTensor<float>(m.input1(), {-2.0, -1.0, 1.0, 2.0});
+  m.PopulateTensor<float>(m.input2(), {1.0, 2.0, 3.0, 4.0});
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1.0, 1.0, 4.0, 6.0}));
+}
+
 // Do a test with the NN api with relu.
 TEST(NNAPIDelegate, AddWithRelu) {
   FloatAddOpModel m({TensorType_FLOAT32, {1, 2, 2, 1}},
diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc
index 3f8f4d198f..2657bcd42b 100644
--- a/tensorflow/contrib/lite/interpreter.cc
+++ b/tensorflow/contrib/lite/interpreter.cc
@@ -123,6 +123,7 @@ Interpreter::Interpreter(ErrorReporter* error_reporter)
   context_.AddTensors = AddTensors;
   context_.tensors = nullptr;
   context_.tensors_size = 0;
+  context_.allow_fp32_relax_to_fp16 = false;
   context_.recommended_num_threads = -1;
   context_.GetExternalContext = GetExternalContext;
   context_.SetExternalContext = SetExternalContext;
diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h
index f0cd178c19..aa2bc4def6 100644
--- a/tensorflow/contrib/lite/interpreter.h
+++ b/tensorflow/contrib/lite/interpreter.h
@@ -336,6 +336,19 @@ class Interpreter {
   // Set the number of threads available to the interpreter.
   void SetNumThreads(int num_threads);
 
+  // Allow float16 precision for FP32 calculation when possible.
+  // default: not allow.
+  // WARNING: This is an experimental API and subject to change.
+  void SetAllowFp16PrecisionForFp32(bool allow) {
+    context_.allow_fp32_relax_to_fp16 = allow;
+  }
+
+  // Get the half precision flag.
+  // WARNING: This is an experimental API and subject to change.
+  bool GetAllowFp16PrecisionForFp32() const {
+    return context_.allow_fp32_relax_to_fp16;
+  }
+
   // Allow a delegate to look at the graph and modify the graph to handle
   // parts of the graph themselves. After this is called, the graph may
   // contain new nodes that replace 1 more nodes.
diff --git a/tensorflow/contrib/lite/kernels/test_util.cc b/tensorflow/contrib/lite/kernels/test_util.cc
index 9156917140..0fdb0a3935 100644
--- a/tensorflow/contrib/lite/kernels/test_util.cc
+++ b/tensorflow/contrib/lite/kernels/test_util.cc
@@ -74,8 +74,8 @@ void SingleOpModel::SetCustomOp(
       CustomOptionsFormat_FLEXBUFFERS));
 }
 
-void SingleOpModel::BuildInterpreter(
-    std::vector<std::vector<int>> input_shapes) {
+void SingleOpModel::BuildInterpreter(std::vector<std::vector<int>> input_shapes,
+                                     bool allow_fp32_relax_to_fp16) {
   auto opcodes = builder_.CreateVector(opcodes_);
   auto operators = builder_.CreateVector(operators_);
   auto tensors = builder_.CreateVector(tensors_);
@@ -113,6 +113,8 @@ void SingleOpModel::BuildInterpreter(
     CHECK(interpreter_->ResizeInputTensor(input_idx, shape) == kTfLiteOk);
   }
 
+  interpreter_->SetAllowFp16PrecisionForFp32(allow_fp32_relax_to_fp16);
+
   // Modify delegate with function.
   if (apply_delegate_fn_) {
     apply_delegate_fn_(interpreter_.get());
diff --git a/tensorflow/contrib/lite/kernels/test_util.h b/tensorflow/contrib/lite/kernels/test_util.h
index bedbe93ae6..84deb0e0e8 100644
--- a/tensorflow/contrib/lite/kernels/test_util.h
+++ b/tensorflow/contrib/lite/kernels/test_util.h
@@ -182,7 +182,8 @@ class SingleOpModel {
 
   // Build the interpreter for this model. Also, resize and allocate all
   // tensors given the shapes of the inputs.
-  void BuildInterpreter(std::vector<std::vector<int>> input_shapes);
+  void BuildInterpreter(std::vector<std::vector<int>> input_shapes,
+                        bool allow_fp32_relax_to_fp16 = false);
 
   void Invoke();
 
diff --git a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
index 81dd459223..687944023b 100644
--- a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
+++ b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
@@ -364,6 +364,9 @@ typedef int (*ANeuralNetworksModel_identifyInputsAndOutputs_fn)(
     ANeuralNetworksModel* model, uint32_t inputCount, const uint32_t* inputs,
     uint32_t outputCount, const uint32_t* outputs);
 
+typedef int (*ANeuralNetworksModel_relaxComputationFloat32toFloat16_fn)(
+    ANeuralNetworksModel* model, bool allow);
+
 typedef int (*ANeuralNetworksExecution_create_fn)(
     ANeuralNetworksCompilation* compilation,
     ANeuralNetworksExecution** execution);
@@ -656,6 +659,34 @@ inline int ANeuralNetworksModel_identifyInputsAndOutputs(
 }
 
 /**
+ * Specifies whether {@link ANEURALNETWORKS_TENSOR_FLOAT32} is allowed to be
+ * calculated with range and/or precision as low as that of the IEEE 754 16-bit
+ * floating-point format. By default, {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * must be calculated using at least the range and precision of the IEEE 754
+ * 32-bit floating-point format.
+ *
+ * @param model The model to be modified.
+ * @param allow 'true' indicates {@link ANEURALNETWORKS_TENSOR_FLOAT32} may be
+ *              calculated with range and/or precision as low as that of the
+ *              IEEE 754 16-bit floating point format. 'false' indicates
+ *              {@link ANEURALNETWORKS_TENSOR_FLOAT32} must be calculated using
+ *              at least the range and precision of the IEEE 754 32-bit floating
+ *              point format.
+ *
+ * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
+ * been called will return an error.
+ *
+ * Available since API level 28.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ */
+inline int ANeuralNetworksModel_relaxComputationFloat32toFloat16(
+    ANeuralNetworksModel* model, bool allow) {
+  LOAD_FUNCTION(ANeuralNetworksModel_relaxComputationFloat32toFloat16);
+  EXECUTE_FUNCTION_RETURN(model, allow);
+}
+
+/**
  * Create a {@link ANeuralNetworksCompilation} to compile the given model.
  * This only creates the object. Compilation is only performed once
  * {@link ANeuralNetworksCompilation_start} is invoked.
diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc
index 817486e898..698de3dd39 100644
--- a/tensorflow/contrib/lite/nnapi_delegate.cc
+++ b/tensorflow/contrib/lite/nnapi_delegate.cc
@@ -757,6 +757,11 @@ TfLiteStatus NNAPIDelegate::BuildGraph(Interpreter* interpreter) {
         reinterpret_cast<const uint32_t*>(augmented_inputs.data()),
         static_cast<uint32_t>(augmented_outputs.size()),
         reinterpret_cast<const uint32_t*>(augmented_outputs.data())));
+
+    if (GetAndroidSdkVersionCached() >= 28) {
+      CHECK_NN(ANeuralNetworksModel_relaxComputationFloat32toFloat16(
+          nn_model_, interpreter->GetAllowFp16PrecisionForFp32()));
+    }
     CHECK_NN(ANeuralNetworksModel_finish(nn_model_));
   }
   if (!nn_compiled_model_) {
author	A. Unique TensorFlower <gardener@tensorflow.org>	2018-09-11 15:36:21 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2018-09-11 15:45:27 -0700
commit	6305a6d83552ba6a472cd72398b60d9241467f1f (patch)
tree	d02fb52415cefa562db46816c230e7522a702c90 /tensorflow/contrib
parent	b1f29d4c618d6bfa85130917848cd7eb89bf0f3b (diff)