aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/lite/delegates/nnapi
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-06-18 17:34:45 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-06-18 17:39:17 -0700
commit3c0c74e0147ef284a6f2cc5533bea8777af1e740 (patch)
tree69f0ecfd75cfcfade58121ca619dc400b4f18364 /tensorflow/contrib/lite/delegates/nnapi
parentaecd8fecf17e8b5215372e92147846b474936f3f (diff)
Make NNAPI delegation support more ops.
PiperOrigin-RevId: 201090056
Diffstat (limited to 'tensorflow/contrib/lite/delegates/nnapi')
-rw-r--r--tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc253
-rw-r--r--tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate_test.cc533
2 files changed, 745 insertions, 41 deletions
diff --git a/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc
index 0731d14419..e96ee92376 100644
--- a/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc
+++ b/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc
@@ -26,6 +26,10 @@ limitations under the License.
#include "tensorflow/contrib/lite/kernels/kernel_util.h"
#include "tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h"
+#ifdef __ANDROID__
+#include <sys/system_properties.h>
+#endif
+
namespace tflite {
namespace {
@@ -37,6 +41,29 @@ namespace {
return kTfLiteError; \
}
+namespace {
+int32_t GetAndroidSdkVersion() {
+#ifdef __ANDROID__
+ const char* sdkProp = "ro.build.version.sdk";
+ char sdkVersion[PROP_VALUE_MAX];
+ int length = __system_property_get(sdkProp, sdkVersion);
+ if (length != 0) {
+ for (int i = 0; i < length; ++i) {
+ int digit = sdkVersion[i] - '0';
+ if (digit < 0 || digit > 9) {
+ // Non-numeric SDK version, assume it's higher then expected;
+ return std::numeric_limits<int32_t>::max();
+ }
+ }
+ return atoi(sdkVersion);
+ }
+#endif // __ANDROID__
+ return 0;
+}
+
+static const int32_t kAndroidSdkVersion = GetAndroidSdkVersion();
+} // namespace
+
// RAII NN API Model Destructor for use with std::unique_ptr
struct NNFreeModel {
void operator()(ANeuralNetworksModel* model) {
@@ -71,7 +98,7 @@ class OperandMapping {
// Add a new mapping from `tflite_index` and return the NN API tensor index.
int add_new_ann_tensor_index(int tflite_index) {
if (tflite_index >= lite_tensor_to_ann_tensor_.size()) {
- lite_tensor_to_ann_tensor_.resize(tflite_index + 1);
+ lite_tensor_to_ann_tensor_.resize(tflite_index + 1, -1);
}
int new_tensor_index = next_ann_tensor_index_++;
lite_tensor_to_ann_tensor_[tflite_index] = new_tensor_index;
@@ -98,14 +125,22 @@ class NNAPIOpBuilder {
operand_mapping_(tensor_mapping),
nn_model_(nn_model) {}
- TfLiteStatus AddScalarInt32Operand(int value) {
- ANeuralNetworksOperandType operand_type{.type = ANEURALNETWORKS_INT32};
- CHECK_NN(context_,
- ANeuralNetworksModel_addOperand(nn_model_, &operand_type));
- int ann_operand = operand_mapping_->add_new_non_tensor_operand();
- CHECK_NN(context_, ANeuralNetworksModel_setOperandValue(
- nn_model_, ann_operand, &value, sizeof(int32_t)));
- augmented_inputs_.push_back(ann_operand);
+ TfLiteStatus AddScalarInt32Operand(int32_t value) {
+ return AddScalarOperand<int32_t>(value, ANEURALNETWORKS_INT32);
+ }
+
+ TfLiteStatus AddScalarFloat32Operand(float value) {
+ return AddScalarOperand<float>(value, ANEURALNETWORKS_FLOAT32);
+ }
+
+ TfLiteStatus AddPoolingParams(void* data) {
+ auto builtin = reinterpret_cast<TfLitePoolParams*>(data);
+ AddScalarInt32Operand(builtin->padding);
+ AddScalarInt32Operand(builtin->stride_width);
+ AddScalarInt32Operand(builtin->stride_height);
+ AddScalarInt32Operand(builtin->filter_width);
+ AddScalarInt32Operand(builtin->filter_height);
+ AddScalarInt32Operand(builtin->activation);
return kTfLiteOk;
}
@@ -149,7 +184,6 @@ class NNAPIOpBuilder {
return kTfLiteOk;
case kTfLiteFloat32:
nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
- scale = 0.f;
break;
case kTfLiteUInt8:
nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
@@ -158,8 +192,8 @@ class NNAPIOpBuilder {
break;
case kTfLiteInt32:
nn_type = ANEURALNETWORKS_TENSOR_INT32;
- scale = 0.f;
- zeroPoint = 0;
+ scale = tensor->params.scale;
+ zeroPoint = tensor->params.zero_point;
break;
default:
context_->ReportError(context_, "Logic error in NN API Delegate.\n");
@@ -192,12 +226,24 @@ class NNAPIOpBuilder {
augmented_inputs_.data(),
static_cast<uint32_t>(augmented_outputs_.size()),
augmented_outputs_.data()));
- augmented_outputs_.clear();
+ augmented_inputs_.clear();
augmented_outputs_.clear();
return kTfLiteOk;
}
private:
+ template <typename T>
+ TfLiteStatus AddScalarOperand(T value, int32_t nn_type) {
+ ANeuralNetworksOperandType operand_type{.type = nn_type};
+ CHECK_NN(context_,
+ ANeuralNetworksModel_addOperand(nn_model_, &operand_type));
+ int ann_operand = operand_mapping_->add_new_non_tensor_operand();
+ CHECK_NN(context_, ANeuralNetworksModel_setOperandValue(
+ nn_model_, ann_operand, &value, sizeof(T)));
+ augmented_inputs_.push_back(ann_operand);
+ return kTfLiteOk;
+ }
+
// TfLiteContext for error handling. Must be named context for macros to
// work.
TfLiteContext* context_;
@@ -227,29 +273,143 @@ class NNAPIDelegateKernel {
// Return a function that knows how to translate a node into its operands
// when called. You can use this function to see if a node is supported
// (i.e. that MappingFn is not nullptr).
- MappingFn Map(TfLiteContext* context, int builtin_code, TfLiteNode* node) {
+ MappingFn Map(TfLiteContext* context, int builtin_code, int version,
+ TfLiteNode* node) {
switch (builtin_code) {
case kTfLiteBuiltinAdd:
- return [](TfLiteContext* context, NNAPIOpBuilder* builder,
- TfLiteNode* node) -> ANeuralNetworksOperationType {
- auto builtin = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
- builder->AddScalarInt32Operand(builtin->activation);
- return ANEURALNETWORKS_ADD;
- };
+ if (version == 1) {
+ return [](TfLiteContext* context, NNAPIOpBuilder* builder,
+ TfLiteNode* node) -> ANeuralNetworksOperationType {
+ auto builtin =
+ reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
+ builder->AddScalarInt32Operand(builtin->activation);
+ return ANEURALNETWORKS_ADD;
+ };
+ } else {
+ return nullptr;
+ }
+ break;
+ case kTfLiteBuiltinMul:
+ if (version == 1) {
+ return [](TfLiteContext* context, NNAPIOpBuilder* builder,
+ TfLiteNode* node) -> ANeuralNetworksOperationType {
+ auto builtin =
+ reinterpret_cast<TfLiteMulParams*>(node->builtin_data);
+ builder->AddScalarInt32Operand(builtin->activation);
+ return ANEURALNETWORKS_MUL;
+ };
+ } else {
+ return nullptr;
+ }
break;
case kTfLiteBuiltinAveragePool2d:
- return [](TfLiteContext* context, NNAPIOpBuilder* builder,
- TfLiteNode* node) -> ANeuralNetworksOperationType {
+ if (version == 1) {
+ return [](TfLiteContext* context, NNAPIOpBuilder* builder,
+ TfLiteNode* node) -> ANeuralNetworksOperationType {
+ builder->AddPoolingParams(node->builtin_data);
+ return ANEURALNETWORKS_AVERAGE_POOL_2D;
+ };
+ } else {
+ return nullptr;
+ }
+ break;
+ case kTfLiteBuiltinMaxPool2d:
+ if (version == 1) {
+ return [](TfLiteContext* context, NNAPIOpBuilder* builder,
+ TfLiteNode* node) -> ANeuralNetworksOperationType {
+ builder->AddPoolingParams(node->builtin_data);
+ return ANEURALNETWORKS_MAX_POOL_2D;
+ };
+ } else {
+ return nullptr;
+ }
+ break;
+ case kTfLiteBuiltinL2Pool2d:
+ if (version == 1) {
+ return [](TfLiteContext* context, NNAPIOpBuilder* builder,
+ TfLiteNode* node) -> ANeuralNetworksOperationType {
+ builder->AddPoolingParams(node->builtin_data);
+ return ANEURALNETWORKS_L2_POOL_2D;
+ };
+ } else {
+ return nullptr;
+ }
+ break;
+ case kTfLiteBuiltinConv2d:
+ if (version == 1) {
auto builtin =
- reinterpret_cast<TfLitePoolParams*>(node->builtin_data);
- builder->AddScalarInt32Operand(builtin->padding);
- builder->AddScalarInt32Operand(builtin->stride_width);
- builder->AddScalarInt32Operand(builtin->stride_height);
- builder->AddScalarInt32Operand(builtin->filter_width);
- builder->AddScalarInt32Operand(builtin->filter_height);
- builder->AddScalarInt32Operand(builtin->activation);
- return ANEURALNETWORKS_AVERAGE_POOL_2D;
- };
+ reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
+ if (builtin->dilation_width_factor != 1 ||
+ builtin->dilation_height_factor != 1 || node->inputs->size != 3) {
+ // NNAPI does not support dilated Conv2D.
+ return nullptr;
+ }
+ return [](TfLiteContext* context, NNAPIOpBuilder* builder,
+ TfLiteNode* node) -> ANeuralNetworksOperationType {
+ auto builtin =
+ reinterpret_cast<TfLiteConvParams*>(node->builtin_data);
+ builder->AddScalarInt32Operand(builtin->padding);
+ builder->AddScalarInt32Operand(builtin->stride_width);
+ builder->AddScalarInt32Operand(builtin->stride_height);
+ builder->AddScalarInt32Operand(builtin->activation);
+ return ANEURALNETWORKS_CONV_2D;
+ };
+ } else {
+ return nullptr;
+ }
+ break;
+ case kTfLiteBuiltinDepthwiseConv2d:
+ if (version == 1) {
+ return [](TfLiteContext* context, NNAPIOpBuilder* builder,
+ TfLiteNode* node) -> ANeuralNetworksOperationType {
+ auto builtin = reinterpret_cast<TfLiteDepthwiseConvParams*>(
+ node->builtin_data);
+ builder->AddScalarInt32Operand(builtin->padding);
+ builder->AddScalarInt32Operand(builtin->stride_width);
+ builder->AddScalarInt32Operand(builtin->stride_height);
+ builder->AddScalarInt32Operand(builtin->depth_multiplier);
+ builder->AddScalarInt32Operand(builtin->activation);
+ return ANEURALNETWORKS_DEPTHWISE_CONV_2D;
+ };
+ } else {
+ return nullptr;
+ }
+ break;
+ case kTfLiteBuiltinFullyConnected:
+ if (version == 1) {
+ return [](TfLiteContext* context, NNAPIOpBuilder* builder,
+ TfLiteNode* node) -> ANeuralNetworksOperationType {
+ auto builtin = reinterpret_cast<TfLiteFullyConnectedParams*>(
+ node->builtin_data);
+ builder->AddScalarInt32Operand(builtin->activation);
+ return ANEURALNETWORKS_FULLY_CONNECTED;
+ };
+ } else {
+ return nullptr;
+ }
+ break;
+ case kTfLiteBuiltinSoftmax:
+ if (version == 1) {
+ return [](TfLiteContext* context, NNAPIOpBuilder* builder,
+ TfLiteNode* node) -> ANeuralNetworksOperationType {
+ auto builtin =
+ reinterpret_cast<TfLiteSoftmaxParams*>(node->builtin_data);
+ builder->AddScalarFloat32Operand(builtin->beta);
+ return ANEURALNETWORKS_SOFTMAX;
+ };
+ } else {
+ return nullptr;
+ }
+ break;
+ case kTfLiteBuiltinReshape:
+ if (version == 1) {
+ return [](TfLiteContext* context, NNAPIOpBuilder* builder,
+ TfLiteNode* node) -> ANeuralNetworksOperationType {
+ return ANEURALNETWORKS_RESHAPE;
+ };
+ } else {
+ return nullptr;
+ }
break;
default:
return nullptr;
@@ -292,10 +452,14 @@ class NNAPIDelegateKernel {
int relative_input_index = 0;
for (auto absolute_input_index : TfLiteIntArrayView(node->inputs)) {
TfLiteTensor* tensor = &context->tensors[absolute_input_index];
- CHECK_NN(context, ANeuralNetworksExecution_setInput(
- execution, relative_input_index, nullptr,
- tensor->data.raw, tensor->bytes));
- relative_input_index++;
+ // TODO(miaowang): make sure the delegation works with dequantized weights
+ // as intermediate tensors.
+ if (tensor->allocation_type != kTfLiteMmapRo) {
+ CHECK_NN(context, ANeuralNetworksExecution_setInput(
+ execution, relative_input_index, nullptr,
+ tensor->data.raw, tensor->bytes));
+ relative_input_index++;
+ }
}
// Set the output tensor buffers.
@@ -345,8 +509,8 @@ class NNAPIDelegateKernel {
TF_LITE_ENSURE_STATUS(builder.AddTensorInput(input_index));
}
// Get op type and operands
- int nn_op_type =
- Map(context, reg->builtin_code, node)(context, &builder, node);
+ int nn_op_type = Map(context, reg->builtin_code, reg->version, node)(
+ context, &builder, node);
// Map outputs to NN API tensor indices.
for (auto output_index : TfLiteIntArrayView(node->outputs)) {
TF_LITE_ENSURE_STATUS(builder.AddTensorOutput(output_index));
@@ -368,8 +532,12 @@ class NNAPIDelegateKernel {
std::vector<uint32_t> outputs;
outputs.reserve(output_tensors->size);
// Make the TensorFlow lite inputs and outputs to ann_indices.
- for (int i : TfLiteIntArrayView(input_tensors))
- inputs.push_back(operand_mapping_.lite_index_to_ann(i));
+ for (int i : TfLiteIntArrayView(input_tensors)) {
+ // Constant tensors are not NNAPI inputs.
+ if (context->tensors[i].allocation_type != kTfLiteMmapRo) {
+ inputs.push_back(operand_mapping_.lite_index_to_ann(i));
+ }
+ }
for (int i : TfLiteIntArrayView(output_tensors))
outputs.push_back(operand_mapping_.lite_index_to_ann(i));
// Tell ANN to declare inputs/outputs
@@ -392,7 +560,8 @@ TfLiteDelegate* NnApiDelegate() {
.Prepare = [](TfLiteContext* context,
TfLiteDelegate* delegate) -> TfLiteStatus {
// Do not check nodes_ if NN API is unavailable.
- if (!NNAPIExists()) return kTfLiteOk;
+ // NN API is only available since Android O-MR1 (API 27).
+ if (kAndroidSdkVersion < 27 || !NNAPIExists()) return kTfLiteOk;
std::vector<int> supported_nodes(1);
// We don't care about all nodes_, we only care about ones in the
@@ -400,6 +569,7 @@ TfLiteDelegate* NnApiDelegate() {
TfLiteIntArray* plan;
TF_LITE_ENSURE_STATUS(context->GetExecutionPlan(context, &plan));
int total_supported_nodes = 0;
+
// Check for every node if it is supported
// TODO(b/80625235): Fix this to do more careful checking of versioning.
for (int node_index : TfLiteIntArrayView(plan)) {
@@ -408,7 +578,8 @@ TfLiteDelegate* NnApiDelegate() {
TF_LITE_ENSURE_STATUS(context->GetNodeAndRegistration(
context, node_index, &node, &registration));
NNAPIDelegateKernel dummy_kernel;
- if (dummy_kernel.Map(context, registration->builtin_code, node)) {
+ if (dummy_kernel.Map(context, registration->builtin_code,
+ registration->version, node)) {
supported_nodes.push_back(node_index);
}
total_supported_nodes += 1;
diff --git a/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate_test.cc b/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate_test.cc
index ff2e721423..799e3efe0b 100644
--- a/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate_test.cc
+++ b/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate_test.cc
@@ -21,8 +21,12 @@ limitations under the License.
namespace tflite {
namespace {
+using ::testing::ElementsAre;
using ::testing::ElementsAreArray;
+// TODO(b/110368244): figure out how to share the existing tests in kernels/ but
+// with the delegation on. Also, add more unit tests to improve code coverage.
+
class FloatAddOpModel : public SingleOpModel {
public:
FloatAddOpModel(const TensorData& input1, const TensorData& input2,
@@ -72,6 +76,535 @@ TEST(NNAPIDelegate, AddWithRelu) {
EXPECT_THAT(m.GetOutput(), ElementsAreArray({0.0, 0.4, 1.0, 1.3}));
}
+class FloatMulOpModel : public SingleOpModel {
+ public:
+ FloatMulOpModel(const TensorData& input1, const TensorData& input2,
+ const TensorData& output,
+ ActivationFunctionType activation_type) {
+ this->SetApplyDelegate([](Interpreter* interpreter) {
+ interpreter->ModifyGraphWithDelegate(NnApiDelegate());
+ });
+ input1_ = AddInput(input1);
+ input2_ = AddInput(input2);
+ output_ = AddOutput(output);
+ SetBuiltinOp(BuiltinOperator_MUL, BuiltinOptions_MulOptions,
+ CreateMulOptions(builder_, activation_type).Union());
+ BuildInterpreter({GetShape(input1_), GetShape(input2_)});
+ }
+
+ int input1() { return input1_; }
+ int input2() { return input2_; }
+
+ std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
+
+ protected:
+ int input1_;
+ int input2_;
+ int output_;
+};
+
+TEST(NNAPIDelegate, MulWithNoActivation) {
+ FloatMulOpModel m({TensorType_FLOAT32, {1, 2, 2, 1}},
+ {TensorType_FLOAT32, {1, 2, 2, 1}},
+ {TensorType_FLOAT32, {}}, ActivationFunctionType_NONE);
+ m.PopulateTensor<float>(m.input1(), {-2.0, 0.2, 0.7, 0.8});
+ m.PopulateTensor<float>(m.input2(), {0.1, 0.2, 0.3, 0.5});
+ m.Invoke();
+ EXPECT_THAT(m.GetOutput(),
+ ElementsAreArray(ArrayFloatNear({-0.2, 0.04, 0.21, 0.4})));
+}
+
+class FloatPoolingOpModel : public SingleOpModel {
+ public:
+ FloatPoolingOpModel(BuiltinOperator type, const TensorData& input,
+ int filter_width, int filter_height,
+ const TensorData& output) {
+ this->SetApplyDelegate([](Interpreter* interpreter) {
+ interpreter->ModifyGraphWithDelegate(NnApiDelegate());
+ });
+
+ input_ = AddInput(input);
+ output_ = AddOutput(output);
+
+ SetBuiltinOp(
+ type, BuiltinOptions_Pool2DOptions,
+ CreatePool2DOptions(builder_, Padding_VALID, 2, 2, filter_width,
+ filter_height, ActivationFunctionType_NONE)
+ .Union());
+
+ BuildInterpreter({GetShape(input_)});
+ }
+
+ void SetInput(std::initializer_list<float> data) {
+ PopulateTensor(input_, data);
+ }
+
+ std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
+
+ protected:
+ int input_;
+ int output_;
+};
+
+TEST(NNAPIDelegate, AveragePoolWithNoActivation) {
+ FloatPoolingOpModel m(BuiltinOperator_AVERAGE_POOL_2D,
+ /*input=*/{TensorType_FLOAT32, {1, 2, 4, 1}},
+ /*filter_width=*/2, /*filter_height=*/2,
+ /*output=*/{TensorType_FLOAT32, {}});
+ m.SetInput({
+ 0, 6, 2, 4, //
+ 3, 2, 10, 7, //
+ });
+ m.Invoke();
+ EXPECT_THAT(m.GetOutput(), ElementsAreArray({2.75, 5.75}));
+}
+
+TEST(NNAPIDelegate, MaxPoolWithNoActivation) {
+ FloatPoolingOpModel m(BuiltinOperator_MAX_POOL_2D,
+ /*input=*/{TensorType_FLOAT32, {1, 2, 4, 1}},
+ /*filter_width=*/2, /*filter_height=*/2,
+ /*output=*/{TensorType_FLOAT32, {}});
+ m.SetInput({
+ 0, 6, 2, 4, //
+ 3, 2, 10, 7, //
+ });
+ m.Invoke();
+ EXPECT_THAT(m.GetOutput(), ElementsAreArray({6, 10}));
+}
+
+TEST(NNAPIDelegate, L2PoolWithNoActivation) {
+ FloatPoolingOpModel m(BuiltinOperator_L2_POOL_2D,
+ /*input=*/{TensorType_FLOAT32, {1, 2, 4, 1}},
+ /*filter_width=*/2, /*filter_height=*/2,
+ /*output=*/{TensorType_FLOAT32, {}});
+ m.SetInput({
+ 0, 6, 2, 4, //
+ 3, 2, 10, 7, //
+ });
+ m.Invoke();
+ EXPECT_THAT(m.GetOutput(), ElementsAreArray({3.5, 6.5}));
+}
+
+class BaseConvolutionOpModel : public SingleOpModel {
+ public:
+ BaseConvolutionOpModel(
+ const TensorData& input, const TensorData& filter,
+ const TensorData& output, int stride_width = 2, int stride_height = 2,
+ enum Padding padding = Padding_VALID,
+ enum ActivationFunctionType activation = ActivationFunctionType_NONE,
+ int dilation_width_factor = 1, int dilation_height_factor = 1) {
+ this->SetApplyDelegate([](Interpreter* interpreter) {
+ interpreter->ModifyGraphWithDelegate(NnApiDelegate());
+ });
+
+ input_ = AddInput(input);
+ filter_ = AddInput(filter);
+
+ int bias_size = GetShape(filter_)[0];
+ if (input.type == TensorType_FLOAT32) {
+ bias_ = AddInput({TensorType_FLOAT32, {bias_size}});
+ } else {
+ // This is a quantized version. The scale of 'bias' depends on the scales
+ // of input and filter. Supposedly this is correctly set during quantized
+ // training.
+ auto bias_scale = GetScale(input_) * GetScale(filter_);
+ TensorData bias{TensorType_INT32, {bias_size}, 0, 0, bias_scale};
+ bias_ = AddInput(bias);
+ }
+
+ output_ = AddOutput(output);
+ if (input.type != TensorType_FLOAT32) {
+ // The following is required by quantized inference. It is the unittest's
+ // responsibility to make sure the output scale falls into the correct
+ // range.
+ CHECK_LT(GetScale(input_) * GetScale(filter_), GetScale(output_));
+ }
+
+ SetBuiltinOp(BuiltinOperator_CONV_2D, BuiltinOptions_Conv2DOptions,
+ CreateConv2DOptions(
+ builder_, padding, stride_width, stride_height, activation,
+ dilation_width_factor, dilation_height_factor)
+ .Union());
+
+ BuildInterpreter({GetShape(input_), GetShape(filter_), GetShape(bias_)});
+ }
+
+ protected:
+ int input_;
+ int filter_;
+ int bias_;
+ int output_;
+};
+
+class ConvolutionOpModel : public BaseConvolutionOpModel {
+ public:
+ using BaseConvolutionOpModel::BaseConvolutionOpModel;
+
+ void SetFilter(std::initializer_list<float> f) { PopulateTensor(filter_, f); }
+
+ void SetBias(std::initializer_list<float> f) { PopulateTensor(bias_, f); }
+
+ void SetInput(std::initializer_list<float> data) {
+ PopulateTensor(input_, data);
+ }
+ std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
+};
+
+class QuantizedConvolutionOpModel : public BaseConvolutionOpModel {
+ public:
+ using BaseConvolutionOpModel::BaseConvolutionOpModel;
+
+ void SetInput(std::initializer_list<float> data) {
+ QuantizeAndPopulate<uint8_t>(input_, data);
+ }
+
+ void SetFilter(std::initializer_list<float> data) {
+ QuantizeAndPopulate<uint8_t>(filter_, data);
+ }
+
+ void SetBias(std::initializer_list<float> data) {
+ QuantizeAndPopulate<int32_t>(bias_, data);
+ }
+
+ std::vector<uint8_t> GetOutput() { return ExtractVector<uint8_t>(output_); }
+ std::vector<float> GetDequantizedOutput() {
+ return Dequantize<uint8_t>(ExtractVector<uint8_t>(output_),
+ GetScale(output_), GetZeroPoint(output_));
+ }
+};
+
+// In this tests we set the input and output scales so that the results
+// match exactly the 'non-quantized' version.
+TEST(NNAPIDelegate, SimpleTestQuantized) {
+ QuantizedConvolutionOpModel m({TensorType_UINT8, {2, 2, 4, 1}, -63.5, 64},
+ {TensorType_UINT8, {3, 2, 2, 1}, -63.5, 64},
+ {TensorType_UINT8, {}, -127, 128});
+ m.SetInput({
+ // First batch
+ 1, 1, 1, 1, // row = 1
+ 2, 2, 2, 2, // row = 2
+ // Second batch
+ 1, 2, 3, 4, // row = 1
+ 1, 2, 3, 4, // row = 2
+ });
+ m.SetFilter({
+ 1, 2, 3, 4, // first 2x2 filter
+ -1, 1, -1, 1, // second 2x2 filter
+ -1, -1, 1, 1, // third 2x2 filter
+ });
+ m.SetBias({1, 2, 3});
+
+ m.Invoke();
+
+ EXPECT_THAT(m.GetDequantizedOutput(),
+ ElementsAreArray(ArrayFloatNear(
+ {
+ 18, 2, 5, // first batch, left
+ 18, 2, 5, // first batch, right
+ 17, 4, 3, // second batch, left
+ 37, 4, 3, // second batch, right
+ },
+ 1e-5)));
+ // For good measure, let's also verify the quantized values:
+ EXPECT_THAT(m.GetOutput(), ElementsAreArray({
+ 145, 129, 132, //
+ 145, 129, 132, //
+ 144, 131, 130, //
+ 164, 131, 130, //
+ }));
+}
+
+TEST(NNAPIDelegate, Conv2DWithNoActivation) {
+ ConvolutionOpModel m({TensorType_FLOAT32, {2, 2, 4, 1}},
+ {TensorType_FLOAT32, {3, 2, 2, 1}},
+ {TensorType_FLOAT32, {}});
+
+ m.SetInput({
+ // First batch
+ 1, 1, 1, 1, // row = 1
+ 2, 2, 2, 2, // row = 2
+ // Second batch
+ 1, 2, 3, 4, // row = 1
+ 1, 2, 3, 4, // row = 2
+ });
+ m.SetFilter({
+ 1, 2, 3, 4, // first 2x2 filter
+ -1, 1, -1, 1, // second 2x2 filter
+ -1, -1, 1, 1, // third 2x2 filter
+ });
+ m.SetBias({1, 2, 3});
+
+ m.Invoke();
+
+ EXPECT_THAT(m.GetOutput(), ElementsAreArray({
+ 18, 2, 5, // first batch, left
+ 18, 2, 5, // first batch, right
+ 17, 4, 3, // second batch, left
+ 37, 4, 3, // second batch, right
+ }));
+}
+
+class DepthwiseConvolutionOpModel : public SingleOpModel {
+ public:
+ DepthwiseConvolutionOpModel(const TensorData& input, const TensorData& filter,
+ const TensorData& output) {
+ this->SetApplyDelegate([](Interpreter* interpreter) {
+ interpreter->ModifyGraphWithDelegate(NnApiDelegate());
+ });
+
+ input_ = AddInput(input);
+ filter_ = AddInput(filter);
+
+ int bias_size = GetShape(filter_)[3];
+ if (input.type == TensorType_FLOAT32) {
+ bias_ = AddInput({TensorType_FLOAT32, {bias_size}});
+ } else {
+ // This is a quantized version. The scale of 'bias' depends on the scales
+ // of input and filter. Supposedly this is correctly set during quantized
+ // training.
+ auto bias_scale = GetScale(input_) * GetScale(filter_);
+ TensorData bias{TensorType_INT32, {bias_size}, 0, 0, bias_scale};
+ bias_ = AddInput(bias);
+ }
+
+ output_ = AddOutput(output);
+
+ int input_depth = GetShape(input_)[3];
+ int output_depth = GetShape(filter_)[3];
+ int depth_mul = output_depth / input_depth;
+
+ SetBuiltinOp(
+ BuiltinOperator_DEPTHWISE_CONV_2D,
+ BuiltinOptions_DepthwiseConv2DOptions,
+ CreateDepthwiseConv2DOptions(builder_, Padding_VALID, 1, 1, depth_mul,
+ ActivationFunctionType_NONE)
+ .Union());
+
+ BuildInterpreter({GetShape(input_), GetShape(filter_), GetShape(bias_)});
+ }
+
+ void SetFilter(std::initializer_list<float> f) { PopulateTensor(filter_, f); }
+
+ void SetBias(std::initializer_list<float> f) { PopulateTensor(bias_, f); }
+
+ void SetInput(std::initializer_list<float> data) {
+ PopulateTensor(input_, data);
+ }
+
+ std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
+
+ protected:
+ int input_;
+ int filter_;
+ int bias_;
+ int output_;
+};
+
+TEST(NNAPIDelegate, DepthwiseConv2DWithNoActivation) {
+ DepthwiseConvolutionOpModel m({TensorType_FLOAT32, {1, 3, 2, 2}},
+ {TensorType_FLOAT32, {1, 2, 2, 4}},
+ {TensorType_FLOAT32, {}});
+
+ m.SetInput({
+ 1, 2, 7, 8, // column 1
+ 3, 4, 9, 10, // column 2
+ 5, 6, 11, 12, // column 3
+ });
+ m.SetFilter({
+ 1, 2, 3, 4, //
+ -9, 10, -11, 12, //
+ 5, 6, 7, 8, //
+ 13, -14, 15, -16, //
+ });
+ m.SetBias({1, 2, 3, 4});
+
+ m.Invoke();
+
+ EXPECT_THAT(m.GetOutput(), ElementsAreArray({
+ 71, -34, 99, -20, //
+ 91, -26, 127, -4, //
+ }));
+}
+
+class FloatFullyConnectedOpModel : public SingleOpModel {
+ public:
+ FloatFullyConnectedOpModel(int units, int batches, const TensorData& input,
+ const TensorData& output = {TensorType_FLOAT32})
+ : batches_(batches), units_(units) {
+ this->SetApplyDelegate([](Interpreter* interpreter) {
+ interpreter->ModifyGraphWithDelegate(NnApiDelegate());
+ });
+
+ int total_input_size = 1;
+ for (int i = 0; i < input.shape.size(); ++i) {
+ total_input_size *= input.shape[i];
+ }
+ input_size_ = total_input_size / batches_;
+
+ input_ = AddInput(input);
+ weights_ =
+ AddInput({input.type, {units_, input_size_}, input.min, input.max});
+
+ if (input.type == TensorType_FLOAT32) {
+ bias_ = AddInput({TensorType_FLOAT32, {units_}});
+ } else {
+ // This is a quantized version. The scale of 'bias' depends on the scales
+ // of input and filter. Supposedly this is correctly set during quantized
+ // training.
+ auto bias_scale = GetScale(input_) * GetScale(weights_);
+ TensorData bias{TensorType_INT32, {units_}, 0, 0, bias_scale};
+ bias_ = AddInput(bias);
+ }
+
+ output_ = AddOutput(output);
+
+ SetBuiltinOp(
+ BuiltinOperator_FULLY_CONNECTED, BuiltinOptions_FullyConnectedOptions,
+ CreateFullyConnectedOptions(builder_, ActivationFunctionType_RELU)
+ .Union());
+ BuildInterpreter({GetShape(input_), GetShape(weights_), GetShape(bias_)});
+ }
+
+ int input_size() { return input_size_; }
+ int num_units() { return units_; }
+ int num_batches() { return batches_; }
+
+ void SetBias(std::initializer_list<float> f) { PopulateTensor(bias_, f); }
+
+ void SetWeights(std::initializer_list<float> f) {
+ PopulateTensor(weights_, f);
+ }
+
+ void SetInput(std::initializer_list<float> data) {
+ PopulateTensor(input_, data);
+ }
+ void SetInput(int offset, float* begin, float* end) {
+ PopulateTensor(input_, offset, begin, end);
+ }
+
+ std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
+
+ protected:
+ int input_;
+ int weights_;
+ int bias_;
+ int output_;
+
+ int batches_;
+ int units_;
+ int input_size_;
+};
+
+TEST(NNAPIDelegate, FullyConnectedSimpleTest) {
+ FloatFullyConnectedOpModel m(/*units=*/3, /*batches=*/2,
+ /*input=*/{TensorType_FLOAT32, {2, 10}});
+ m.SetWeights({
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 0
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, // u = 1
+ });
+ m.SetBias({1, 2, 3});
+
+ m.SetInput({
+ 1, 2, 3, 4, 5, 6, 7, 8, -9, -10, // b = 0
+ 1, 2, 3, 4, 5, 6, 7, -8, 9, -10, // b = 1
+ });
+
+ m.Invoke();
+
+ EXPECT_THAT(m.GetOutput(), ElementsAre(24, 25, 26, 58, 59, 60));
+}
+
+class SoftmaxOpModel : public SingleOpModel {
+ public:
+ SoftmaxOpModel(int batches, int size, float beta)
+ : batches_(batches), input_size_(size), beta_(beta) {
+ this->SetApplyDelegate([](Interpreter* interpreter) {
+ interpreter->ModifyGraphWithDelegate(NnApiDelegate());
+ });
+
+ input_ = AddInput(TensorType_FLOAT32);
+ output_ = AddOutput(TensorType_FLOAT32);
+ SetBuiltinOp(BuiltinOperator_SOFTMAX, BuiltinOptions_SoftmaxOptions,
+ CreateSoftmaxOptions(builder_, beta_).Union());
+ BuildInterpreter({{batches_, input_size_}});
+ }
+
+ void SetInput(std::initializer_list<float> data) {
+ PopulateTensor(input_, data);
+ }
+
+ void SetInput(int offset, float* begin, float* end) {
+ PopulateTensor(input_, offset, begin, end);
+ }
+
+ std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
+
+ private:
+ int input_;
+ int output_;
+
+ int batches_;
+ int input_size_;
+ float beta_;
+};
+
+TEST(NNAPIDelegate, SoftmaxSimpleTest) {
+ SoftmaxOpModel m(/*batches=*/2, /*size=*/5, /*beta=*/1.0);
+ m.SetInput({
+ 1.0, 2.0, 3.0, 4.0, 5.0, // b = 0
+ -1.0, -2.0, -3.0, -4.0, -5.0, // b = 0
+ });
+
+ m.Invoke();
+
+ EXPECT_THAT(
+ m.GetOutput(),
+ ElementsAreArray(ArrayFloatNear(
+ {0.011656231, 0.031684921, 0.086128544, 0.234121657, 0.636408647,
+ 0.636408647, 0.234121657, 0.086128544, 0.031684921, 0.011656231},
+ 1e-6)));
+}
+
+class ReshapeOpModel : public SingleOpModel {
+ public:
+ ReshapeOpModel(std::initializer_list<int> input_shape,
+ std::initializer_list<int> new_shape) {
+ this->SetApplyDelegate([](Interpreter* interpreter) {
+ interpreter->ModifyGraphWithDelegate(NnApiDelegate());
+ });
+
+ input_ = AddInput(TensorType_FLOAT32);
+ new_shape_ = AddInput(TensorType_INT32);
+ output_ = AddOutput(TensorType_FLOAT32);
+ SetBuiltinOp(
+ BuiltinOperator_RESHAPE, BuiltinOptions_ReshapeOptions,
+ CreateReshapeOptions(builder_, builder_.CreateVector<int>(new_shape))
+ .Union());
+ BuildInterpreter({input_shape, {static_cast<int>(new_shape.size())}});
+ PopulateTensor<int>(new_shape_, new_shape);
+ }
+
+ void SetInput(std::initializer_list<float> data) {
+ PopulateTensor<float>(input_, data);
+ }
+ std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
+ std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
+
+ private:
+ int input_;
+ int new_shape_;
+ int output_;
+};
+
+TEST(NNAPIDelegate, ReshapeSimpleTest) {
+ ReshapeOpModel m({1, 2, 4, 1}, {2, 2, 2});
+ m.SetInput({1, 2, 3, 4, 5, 6, 7, 8});
+ m.Invoke();
+ EXPECT_THAT(m.GetOutput(), ElementsAreArray({1, 2, 3, 4, 5, 6, 7, 8}));
+ EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2, 2, 2}));
+}
+
} // namespace
} // namespace tflite