aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-09-11 15:36:21 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-09-11 15:45:27 -0700
commit6305a6d83552ba6a472cd72398b60d9241467f1f (patch)
treed02fb52415cefa562db46816c230e7522a702c90 /tensorflow/contrib
parentb1f29d4c618d6bfa85130917848cd7eb89bf0f3b (diff)
Add an experimental API to allow half precision for FP32 calculation.
PiperOrigin-RevId: 212535448
Diffstat (limited to 'tensorflow/contrib')
-rw-r--r--tensorflow/contrib/lite/c/c_api_internal.h5
-rw-r--r--tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc8
-rw-r--r--tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate_test.cc19
-rw-r--r--tensorflow/contrib/lite/interpreter.cc1
-rw-r--r--tensorflow/contrib/lite/interpreter.h13
-rw-r--r--tensorflow/contrib/lite/kernels/test_util.cc6
-rw-r--r--tensorflow/contrib/lite/kernels/test_util.h3
-rw-r--r--tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h31
-rw-r--r--tensorflow/contrib/lite/nnapi_delegate.cc5
9 files changed, 86 insertions, 5 deletions
diff --git a/tensorflow/contrib/lite/c/c_api_internal.h b/tensorflow/contrib/lite/c/c_api_internal.h
index 48df68a654..34c874d1d2 100644
--- a/tensorflow/contrib/lite/c/c_api_internal.h
+++ b/tensorflow/contrib/lite/c/c_api_internal.h
@@ -374,6 +374,11 @@ typedef struct TfLiteContext {
// WARNING: This is an experimental interface that is subject to change.
void (*SetExternalContext)(struct TfLiteContext*, TfLiteExternalContextType,
TfLiteExternalContext*);
+
+ // Flag for allowing float16 precision for FP32 calculation.
+ // default: false.
+ // WARNING: This is an experimental API and subject to change.
+ bool allow_fp32_relax_to_fp16;
} TfLiteContext;
typedef struct _TfLiteRegistration {
diff --git a/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc
index e3eebac4da..c6587b3d3f 100644
--- a/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc
+++ b/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc
@@ -1115,6 +1115,14 @@ class NNAPIDelegateKernel {
CHECK_NN(context, ANeuralNetworksModel_identifyInputsAndOutputs(
nn_model_.get(), inputs.size(), inputs.data(),
outputs.size(), outputs.data()));
+
+ // Set relaxed computation mode for fp32 if possible.
+ if (kAndroidSdkVersion >= kMinSdkVersionForNNAPI11) {
+ CHECK_NN(context,
+ ANeuralNetworksModel_relaxComputationFloat32toFloat16(
+ nn_model_.get(), context->allow_fp32_relax_to_fp16));
+ }
+
// Finalize the model
CHECK_NN(context, ANeuralNetworksModel_finish(nn_model_.get()));
diff --git a/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate_test.cc b/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate_test.cc
index 4b01aefd6a..9626c54c74 100644
--- a/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate_test.cc
+++ b/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate_test.cc
@@ -40,13 +40,15 @@ class FloatAddOpModel : public SingleOpModelWithNNAPI {
public:
FloatAddOpModel(const TensorData& input1, const TensorData& input2,
const TensorData& output,
- ActivationFunctionType activation_type) {
+ ActivationFunctionType activation_type,
+ bool allow_fp32_relax_to_fp16 = false) {
input1_ = AddInput(input1);
input2_ = AddInput(input2);
output_ = AddOutput(output);
SetBuiltinOp(BuiltinOperator_ADD, BuiltinOptions_AddOptions,
CreateAddOptions(builder_, activation_type).Union());
- BuildInterpreter({GetShape(input1_), GetShape(input2_)});
+ BuildInterpreter({GetShape(input1_), GetShape(input2_)},
+ allow_fp32_relax_to_fp16);
}
int input1() { return input1_; }
@@ -71,6 +73,19 @@ TEST(NNAPIDelegate, AddWithNoActivation) {
EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1.9, 0.4, 1.0, 1.3}));
}
+// Do a test with the NN API using no activation.
+// The test allows computing FP32 with FP16 precision. In this particular case,
+// calculating in FP32 or FP16 should produce the same results.
+TEST(NNAPIDelegate, AddWithNoActivationRelaxed) {
+ FloatAddOpModel m(
+ {TensorType_FLOAT32, {1, 2, 2, 1}}, {TensorType_FLOAT32, {1, 2, 2, 1}},
+ {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE, true);
+ m.PopulateTensor<float>(m.input1(), {-2.0, -1.0, 1.0, 2.0});
+ m.PopulateTensor<float>(m.input2(), {1.0, 2.0, 3.0, 4.0});
+ m.Invoke();
+ EXPECT_THAT(m.GetOutput(), ElementsAreArray({-1.0, 1.0, 4.0, 6.0}));
+}
+
// Do a test with the NN api with relu.
TEST(NNAPIDelegate, AddWithRelu) {
FloatAddOpModel m({TensorType_FLOAT32, {1, 2, 2, 1}},
diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc
index 3f8f4d198f..2657bcd42b 100644
--- a/tensorflow/contrib/lite/interpreter.cc
+++ b/tensorflow/contrib/lite/interpreter.cc
@@ -123,6 +123,7 @@ Interpreter::Interpreter(ErrorReporter* error_reporter)
context_.AddTensors = AddTensors;
context_.tensors = nullptr;
context_.tensors_size = 0;
+ context_.allow_fp32_relax_to_fp16 = false;
context_.recommended_num_threads = -1;
context_.GetExternalContext = GetExternalContext;
context_.SetExternalContext = SetExternalContext;
diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h
index f0cd178c19..aa2bc4def6 100644
--- a/tensorflow/contrib/lite/interpreter.h
+++ b/tensorflow/contrib/lite/interpreter.h
@@ -336,6 +336,19 @@ class Interpreter {
// Set the number of threads available to the interpreter.
void SetNumThreads(int num_threads);
+ // Allow float16 precision for FP32 calculation when possible.
+ // default: not allow.
+ // WARNING: This is an experimental API and subject to change.
+ void SetAllowFp16PrecisionForFp32(bool allow) {
+ context_.allow_fp32_relax_to_fp16 = allow;
+ }
+
+ // Get the half precision flag.
+ // WARNING: This is an experimental API and subject to change.
+ bool GetAllowFp16PrecisionForFp32() const {
+ return context_.allow_fp32_relax_to_fp16;
+ }
+
// Allow a delegate to look at the graph and modify the graph to handle
// parts of the graph themselves. After this is called, the graph may
// contain new nodes that replace 1 more nodes.
diff --git a/tensorflow/contrib/lite/kernels/test_util.cc b/tensorflow/contrib/lite/kernels/test_util.cc
index 9156917140..0fdb0a3935 100644
--- a/tensorflow/contrib/lite/kernels/test_util.cc
+++ b/tensorflow/contrib/lite/kernels/test_util.cc
@@ -74,8 +74,8 @@ void SingleOpModel::SetCustomOp(
CustomOptionsFormat_FLEXBUFFERS));
}
-void SingleOpModel::BuildInterpreter(
- std::vector<std::vector<int>> input_shapes) {
+void SingleOpModel::BuildInterpreter(std::vector<std::vector<int>> input_shapes,
+ bool allow_fp32_relax_to_fp16) {
auto opcodes = builder_.CreateVector(opcodes_);
auto operators = builder_.CreateVector(operators_);
auto tensors = builder_.CreateVector(tensors_);
@@ -113,6 +113,8 @@ void SingleOpModel::BuildInterpreter(
CHECK(interpreter_->ResizeInputTensor(input_idx, shape) == kTfLiteOk);
}
+ interpreter_->SetAllowFp16PrecisionForFp32(allow_fp32_relax_to_fp16);
+
// Modify delegate with function.
if (apply_delegate_fn_) {
apply_delegate_fn_(interpreter_.get());
diff --git a/tensorflow/contrib/lite/kernels/test_util.h b/tensorflow/contrib/lite/kernels/test_util.h
index bedbe93ae6..84deb0e0e8 100644
--- a/tensorflow/contrib/lite/kernels/test_util.h
+++ b/tensorflow/contrib/lite/kernels/test_util.h
@@ -182,7 +182,8 @@ class SingleOpModel {
// Build the interpreter for this model. Also, resize and allocate all
// tensors given the shapes of the inputs.
- void BuildInterpreter(std::vector<std::vector<int>> input_shapes);
+ void BuildInterpreter(std::vector<std::vector<int>> input_shapes,
+ bool allow_fp32_relax_to_fp16 = false);
void Invoke();
diff --git a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
index 81dd459223..687944023b 100644
--- a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
+++ b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h
@@ -364,6 +364,9 @@ typedef int (*ANeuralNetworksModel_identifyInputsAndOutputs_fn)(
ANeuralNetworksModel* model, uint32_t inputCount, const uint32_t* inputs,
uint32_t outputCount, const uint32_t* outputs);
+typedef int (*ANeuralNetworksModel_relaxComputationFloat32toFloat16_fn)(
+ ANeuralNetworksModel* model, bool allow);
+
typedef int (*ANeuralNetworksExecution_create_fn)(
ANeuralNetworksCompilation* compilation,
ANeuralNetworksExecution** execution);
@@ -656,6 +659,34 @@ inline int ANeuralNetworksModel_identifyInputsAndOutputs(
}
/**
+ * Specifies whether {@link ANEURALNETWORKS_TENSOR_FLOAT32} is allowed to be
+ * calculated with range and/or precision as low as that of the IEEE 754 16-bit
+ * floating-point format. By default, {@link ANEURALNETWORKS_TENSOR_FLOAT32}
+ * must be calculated using at least the range and precision of the IEEE 754
+ * 32-bit floating-point format.
+ *
+ * @param model The model to be modified.
+ * @param allow 'true' indicates {@link ANEURALNETWORKS_TENSOR_FLOAT32} may be
+ * calculated with range and/or precision as low as that of the
+ * IEEE 754 16-bit floating point format. 'false' indicates
+ * {@link ANEURALNETWORKS_TENSOR_FLOAT32} must be calculated using
+ * at least the range and precision of the IEEE 754 32-bit floating
+ * point format.
+ *
+ * Attempting to modify a model once {@link ANeuralNetworksModel_finish} has
+ * been called will return an error.
+ *
+ * Available since API level 28.
+ *
+ * See {@link ANeuralNetworksModel} for information on multithreaded usage.
+ */
+inline int ANeuralNetworksModel_relaxComputationFloat32toFloat16(
+ ANeuralNetworksModel* model, bool allow) {
+ LOAD_FUNCTION(ANeuralNetworksModel_relaxComputationFloat32toFloat16);
+ EXECUTE_FUNCTION_RETURN(model, allow);
+}
+
+/**
* Create a {@link ANeuralNetworksCompilation} to compile the given model.
* This only creates the object. Compilation is only performed once
* {@link ANeuralNetworksCompilation_start} is invoked.
diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc
index 817486e898..698de3dd39 100644
--- a/tensorflow/contrib/lite/nnapi_delegate.cc
+++ b/tensorflow/contrib/lite/nnapi_delegate.cc
@@ -757,6 +757,11 @@ TfLiteStatus NNAPIDelegate::BuildGraph(Interpreter* interpreter) {
reinterpret_cast<const uint32_t*>(augmented_inputs.data()),
static_cast<uint32_t>(augmented_outputs.size()),
reinterpret_cast<const uint32_t*>(augmented_outputs.data())));
+
+ if (GetAndroidSdkVersionCached() >= 28) {
+ CHECK_NN(ANeuralNetworksModel_relaxComputationFloat32toFloat16(
+ nn_model_, interpreter->GetAllowFp16PrecisionForFp32()));
+ }
CHECK_NN(ANeuralNetworksModel_finish(nn_model_));
}
if (!nn_compiled_model_) {