diff options
author | 2018-09-11 15:36:21 -0700 | |
---|---|---|
committer | 2018-09-11 15:45:27 -0700 | |
commit | 6305a6d83552ba6a472cd72398b60d9241467f1f (patch) | |
tree | d02fb52415cefa562db46816c230e7522a702c90 /tensorflow/contrib | |
parent | b1f29d4c618d6bfa85130917848cd7eb89bf0f3b (diff) |
Add an experimental API to allow half precision for FP32 calculation.
PiperOrigin-RevId: 212535448
Diffstat (limited to 'tensorflow/contrib')
-rw-r--r-- | tensorflow/contrib/lite/c/c_api_internal.h | 5 | ||||
-rw-r--r-- | tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc | 8 | ||||
-rw-r--r-- | tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate_test.cc | 19 | ||||
-rw-r--r-- | tensorflow/contrib/lite/interpreter.cc | 1 | ||||
-rw-r--r-- | tensorflow/contrib/lite/interpreter.h | 13 | ||||
-rw-r--r-- | tensorflow/contrib/lite/kernels/test_util.cc | 6 | ||||
-rw-r--r-- | tensorflow/contrib/lite/kernels/test_util.h | 3 | ||||
-rw-r--r-- | tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h | 31 | ||||
-rw-r--r-- | tensorflow/contrib/lite/nnapi_delegate.cc | 5 |
9 files changed, 86 insertions, 5 deletions
diff --git a/tensorflow/contrib/lite/c/c_api_internal.h b/tensorflow/contrib/lite/c/c_api_internal.h index 48df68a654..34c874d1d2 100644 --- a/tensorflow/contrib/lite/c/c_api_internal.h +++ b/tensorflow/contrib/lite/c/c_api_internal.h @@ -374,6 +374,11 @@ typedef struct TfLiteContext { // WARNING: This is an experimental interface that is subject to change. void (*SetExternalContext)(struct TfLiteContext*, TfLiteExternalContextType, TfLiteExternalContext*); + + // Flag for allowing float16 precision for FP32 calculation. + // default: false. + // WARNING: This is an experimental API and subject to change. + bool allow_fp32_relax_to_fp16; } TfLiteContext; typedef struct _TfLiteRegistration { diff --git a/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc index e3eebac4da..c6587b3d3f 100644 --- a/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc +++ b/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc @@ -1115,6 +1115,14 @@ class NNAPIDelegateKernel { CHECK_NN(context, ANeuralNetworksModel_identifyInputsAndOutputs( nn_model_.get(), inputs.size(), inputs.data(), outputs.size(), outputs.data())); + + // Set relaxed computation mode for fp32 if possible. + if (kAndroidSdkVersion >= kMinSdkVersionForNNAPI11) { + CHECK_NN(context, + ANeuralNetworksModel_relaxComputationFloat32toFloat16( + nn_model_.get(), context->allow_fp32_relax_to_fp16)); + } + // Finalize the model CHECK_NN(context, ANeuralNetworksModel_finish(nn_model_.get())); diff --git a/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate_test.cc b/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate_test.cc index 4b01aefd6a..9626c54c74 100644 --- a/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate_test.cc +++ b/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate_test.cc @@ -40,13 +40,15 @@ class FloatAddOpModel : public SingleOpModelWithNNAPI { public: FloatAddOpModel(const TensorData& input1, const TensorData& input2, const TensorData& output, - ActivationFunctionType activation_type) { + ActivationFunctionType activation_type, + bool allow_fp32_relax_to_fp16 = false) { input1_ = AddInput(input1); input2_ = AddInput(input2); output_ = AddOutput(output); SetBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions, CreateAddOptions(builder_, activation_type).Union()); - BuildInterpreter({GetShape(input1_), GetShape(input2_)}); + BuildInterpreter({GetShape(input1_), GetShape(input2_)}, + allow_fp32_relax_to_fp16); } int input1() { return input1_; } @@ -71,6 +73,19 @@ TEST(NNAPIDelegate, AddWithNoActivation) { EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1.9, 0.4, 1.0, 1.3})); } +// Do a test with the NN API using no activation. +// The test allows computing FP32 with FP16 precision. In this particular case, +// calculating in FP32 or FP16 should produce the same results. +TEST(NNAPIDelegate, AddWithNoActivationRelaxed) { + FloatAddOpModel m( + {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}}, + {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE, true); + m.PopulateTensor<float>(m.input1(), {-2.0, -1.0, 1.0, 2.0}); + m.PopulateTensor<float>(m.input2(), {1.0, 2.0, 3.0, 4.0}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1.0, 1.0, 4.0, 6.0})); +} + // Do a test with the NN api with relu. TEST(NNAPIDelegate, AddWithRelu) { FloatAddOpModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc index 3f8f4d198f..2657bcd42b 100644 --- a/tensorflow/contrib/lite/interpreter.cc +++ b/tensorflow/contrib/lite/interpreter.cc @@ -123,6 +123,7 @@ Interpreter::Interpreter(ErrorReporter* error_reporter) context_.AddTensors = AddTensors; context_.tensors = nullptr; context_.tensors_size = 0; + context_.allow_fp32_relax_to_fp16 = false; context_.recommended_num_threads = -1; context_.GetExternalContext = GetExternalContext; context_.SetExternalContext = SetExternalContext; diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h index f0cd178c19..aa2bc4def6 100644 --- a/tensorflow/contrib/lite/interpreter.h +++ b/tensorflow/contrib/lite/interpreter.h @@ -336,6 +336,19 @@ class Interpreter { // Set the number of threads available to the interpreter. void SetNumThreads(int num_threads); + // Allow float16 precision for FP32 calculation when possible. + // default: not allow. + // WARNING: This is an experimental API and subject to change. + void SetAllowFp16PrecisionForFp32(bool allow) { + context_.allow_fp32_relax_to_fp16 = allow; + } + + // Get the half precision flag. + // WARNING: This is an experimental API and subject to change. + bool GetAllowFp16PrecisionForFp32() const { + return context_.allow_fp32_relax_to_fp16; + } + // Allow a delegate to look at the graph and modify the graph to handle // parts of the graph themselves. After this is called, the graph may // contain new nodes that replace 1 more nodes. diff --git a/tensorflow/contrib/lite/kernels/test_util.cc b/tensorflow/contrib/lite/kernels/test_util.cc index 9156917140..0fdb0a3935 100644 --- a/tensorflow/contrib/lite/kernels/test_util.cc +++ b/tensorflow/contrib/lite/kernels/test_util.cc @@ -74,8 +74,8 @@ void SingleOpModel::SetCustomOp( CustomOptionsFormat_FLEXBUFFERS)); } -void SingleOpModel::BuildInterpreter( - std::vector<std::vector<int>> input_shapes) { +void SingleOpModel::BuildInterpreter(std::vector<std::vector<int>> input_shapes, + bool allow_fp32_relax_to_fp16) { auto opcodes = builder_.CreateVector(opcodes_); auto operators = builder_.CreateVector(operators_); auto tensors = builder_.CreateVector(tensors_); @@ -113,6 +113,8 @@ void SingleOpModel::BuildInterpreter( CHECK(interpreter_->ResizeInputTensor(input_idx, shape) == kTfLiteOk); } + interpreter_->SetAllowFp16PrecisionForFp32(allow_fp32_relax_to_fp16); + // Modify delegate with function. if (apply_delegate_fn_) { apply_delegate_fn_(interpreter_.get()); diff --git a/tensorflow/contrib/lite/kernels/test_util.h b/tensorflow/contrib/lite/kernels/test_util.h index bedbe93ae6..84deb0e0e8 100644 --- a/tensorflow/contrib/lite/kernels/test_util.h +++ b/tensorflow/contrib/lite/kernels/test_util.h @@ -182,7 +182,8 @@ class SingleOpModel { // Build the interpreter for this model. Also, resize and allocate all // tensors given the shapes of the inputs. - void BuildInterpreter(std::vector<std::vector<int>> input_shapes); + void BuildInterpreter(std::vector<std::vector<int>> input_shapes, + bool allow_fp32_relax_to_fp16 = false); void Invoke(); diff --git a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h index 81dd459223..687944023b 100644 --- a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h +++ b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h @@ -364,6 +364,9 @@ typedef int (*ANeuralNetworksModel_identifyInputsAndOutputs_fn)( ANeuralNetworksModel* model, uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount, const uint32_t* outputs); +typedef int (*ANeuralNetworksModel_relaxComputationFloat32toFloat16_fn)( + ANeuralNetworksModel* model, bool allow); + typedef int (*ANeuralNetworksExecution_create_fn)( ANeuralNetworksCompilation* compilation, ANeuralNetworksExecution** execution); @@ -656,6 +659,34 @@ inline int ANeuralNetworksModel_identifyInputsAndOutputs( } /** + * Specifies whether {@link ANEURALNETWORKS_TENSOR_FLOAT32} is allowed to be + * calculated with range and/or precision as low as that of the IEEE 754 16-bit + * floating-point format. By default, {@link ANEURALNETWORKS_TENSOR_FLOAT32} + * must be calculated using at least the range and precision of the IEEE 754 + * 32-bit floating-point format. + * + * @param model The model to be modified. + * @param allow 'true' indicates {@link ANEURALNETWORKS_TENSOR_FLOAT32} may be + * calculated with range and/or precision as low as that of the + * IEEE 754 16-bit floating point format. 'false' indicates + * {@link ANEURALNETWORKS_TENSOR_FLOAT32} must be calculated using + * at least the range and precision of the IEEE 754 32-bit floating + * point format. + * + * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has + * been called will return an error. + * + * Available since API level 28. + * + * See {@link ANeuralNetworksModel} for information on multithreaded usage. + */ +inline int ANeuralNetworksModel_relaxComputationFloat32toFloat16( + ANeuralNetworksModel* model, bool allow) { + LOAD_FUNCTION(ANeuralNetworksModel_relaxComputationFloat32toFloat16); + EXECUTE_FUNCTION_RETURN(model, allow); +} + +/** * Create a {@link ANeuralNetworksCompilation} to compile the given model. * This only creates the object. Compilation is only performed once * {@link ANeuralNetworksCompilation_start} is invoked. diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc index 817486e898..698de3dd39 100644 --- a/tensorflow/contrib/lite/nnapi_delegate.cc +++ b/tensorflow/contrib/lite/nnapi_delegate.cc @@ -757,6 +757,11 @@ TfLiteStatus NNAPIDelegate::BuildGraph(Interpreter* interpreter) { reinterpret_cast<const uint32_t*>(augmented_inputs.data()), static_cast<uint32_t>(augmented_outputs.size()), reinterpret_cast<const uint32_t*>(augmented_outputs.data()))); + + if (GetAndroidSdkVersionCached() >= 28) { + CHECK_NN(ANeuralNetworksModel_relaxComputationFloat32toFloat16( + nn_model_, interpreter->GetAllowFp16PrecisionForFp32())); + } CHECK_NN(ANeuralNetworksModel_finish(nn_model_)); } if (!nn_compiled_model_) { |