aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/lite/nnapi_delegate.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/contrib/lite/nnapi_delegate.cc')
-rw-r--r--tensorflow/contrib/lite/nnapi_delegate.cc276
1 files changed, 200 insertions, 76 deletions
diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc
index 7627d89c09..551e8ed320 100644
--- a/tensorflow/contrib/lite/nnapi_delegate.cc
+++ b/tensorflow/contrib/lite/nnapi_delegate.cc
@@ -29,27 +29,46 @@ limitations under the License.
namespace tflite {
-// TODO(aselle): FATAL leaves resources hanging.
-void FATAL(const char* format, ...) {
+void logError(const char* format, ...) {
+ // TODO(mikie): use android logging, stderr is not captured for Java
+ // applications
va_list args;
va_start(args, format);
vfprintf(stderr, format, args);
va_end(args);
+ fprintf(stderr, "\n");
fflush(stderr);
- exit(1);
}
+#define FATAL(...) \
+ logError(__VA_ARGS__); \
+ exit(1);
+
// TODO(aselle): Change the error model to use status codes.
-#define CHECK_TFLITE_SUCCESS(x) \
- if (x != kTfLiteOk) { \
- FATAL("Aborting since tflite returned failure."); \
+#define CHECK_TFLITE_SUCCESS(x) \
+ if (x != kTfLiteOk) { \
+ FATAL("Aborting since tflite returned failure nnapi_delegate.cc:%d.", \
+ __LINE__); \
}
-#define CHECK_NN(x) \
- if (x != ANEURALNETWORKS_NO_ERROR) { \
- FATAL("Aborting since tflite returned failure."); \
+#define CHECK_NN(x) \
+ if (x != ANEURALNETWORKS_NO_ERROR) { \
+ FATAL("Aborting since NNAPI returned failure nnapi_delegate.cc:%d", \
+ __LINE__); \
}
+#define RETURN_ERROR_IF_NN_FAILED(x) \
+ if (x != ANEURALNETWORKS_NO_ERROR) { \
+ logError( \
+ "Returning error since NNAPI returned failure nnapi_delegate.cc:%d.", \
+ __LINE__); \
+ return kTfLiteError; \
+ }
+
+// Tracking of NNAPI operand ids
+static const int64_t kOperandIdNotSet = -1;
+static const int64_t kOperandNotNeeded = -2;
+
namespace {
int32_t GetAndroidSdkVersion() {
@@ -104,21 +123,16 @@ NNAPIDelegate::~NNAPIDelegate() {
}
// Adds the tensors of the interpreter to the NN API model.
-// Returns the number of operands added.
-uint32_t addTensorOperands(tflite::Interpreter* interpreter,
- ANeuralNetworksModel* nn_model,
- const std::vector<uint32_t>& skip_list) {
+TfLiteStatus addTensorOperands(tflite::Interpreter* interpreter,
+ ANeuralNetworksModel* nn_model,
+ uint32_t* no_of_operands_added,
+ std::vector<int64_t>* nnapi_ids) {
uint32_t next_id = 0;
for (size_t i = 0; i < interpreter->tensors_size(); i++) {
- // skip temporaries tensors.
- bool shouldSkip = false;
- for (auto skip_idx : skip_list) {
- if (i == skip_idx) {
- shouldSkip = true;
- break;
- }
- }
- if (shouldSkip) continue;
+ // Skip temporaries and RNN back-edges.
+ if ((*nnapi_ids)[i] == kOperandNotNeeded) continue;
+
+ (*nnapi_ids)[i] = int64_t(next_id);
int32_t nn_type = 0;
// NNAPI requires 32-bit float scale to be zero, tflite doesn't care
@@ -144,7 +158,18 @@ uint32_t addTensorOperands(tflite::Interpreter* interpreter,
zeroPoint = tensor->params.zero_point;
break;
default:
- FATAL("Unsupported type.");
+ logError("Unsupported tensor type %d", tensor->type);
+ return kTfLiteError;
+ }
+ if (tensor->dims->size == 0) {
+ logError("NNAPI doesn't support tensors with rank 0 (index %d name %s)",
+ i, tensor->name);
+ return kTfLiteError;
+ }
+ if (tensor->dims->size > 4) {
+ logError("NNAPI doesn't support tensors with rank > 4 (index %d name %s)",
+ i, tensor->name);
+ return kTfLiteError;
}
// TODO(aselle): Note, many of these are intermediate results. Do I need
// to ever specify these sizes. I am currently below doing setValue
@@ -154,36 +179,53 @@ uint32_t addTensorOperands(tflite::Interpreter* interpreter,
ANeuralNetworksOperandType operand_type{
nn_type, static_cast<uint32_t>(tensor->dims->size),
reinterpret_cast<uint32_t*>(tensor->dims->data), scale, zeroPoint};
- CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type));
+ RETURN_ERROR_IF_NN_FAILED(
+ ANeuralNetworksModel_addOperand(nn_model, &operand_type));
// TODO(aselle): Based on Michael's suggestion, limiting this to read
// only memory
if (tensor->allocation_type == kTfLiteMmapRo) {
if (const NNAPIAllocation* alloc = dynamic_cast<const NNAPIAllocation*>(
static_cast<const Allocation*>(tensor->allocation))) {
- CHECK_NN(ANeuralNetworksModel_setOperandValueFromMemory(
- nn_model, next_id, alloc->memory(), alloc->offset(tensor->data.raw),
- tensor->bytes));
+ RETURN_ERROR_IF_NN_FAILED(
+ ANeuralNetworksModel_setOperandValueFromMemory(
+ nn_model, next_id, alloc->memory(),
+ alloc->offset(tensor->data.raw), tensor->bytes));
} else {
- CHECK_NN(ANeuralNetworksModel_setOperandValue(
+ RETURN_ERROR_IF_NN_FAILED(ANeuralNetworksModel_setOperandValue(
nn_model, next_id, tensor->data.raw, tensor->bytes));
}
} else if (tensor->bytes == 0) {
// These size 0 tensors are optional tensors reserved.
- CHECK_NN(
+ RETURN_ERROR_IF_NN_FAILED(
ANeuralNetworksModel_setOperandValue(nn_model, next_id, nullptr, 0));
}
++next_id;
}
- return next_id;
+ *no_of_operands_added = next_id;
+ return kTfLiteOk;
+}
+
+void MapAndAddTensorIds(const int* from_ids_buf, size_t from_ids_count,
+ std::vector<uint32_t>* into,
+ const std::vector<int64_t>& map) {
+ for (size_t i = 0; i < from_ids_count; i++) {
+ int from_id = from_ids_buf[i];
+ if (from_id == kOptionalTensor) {
+ into->push_back(from_id);
+ } else {
+ into->push_back(map[from_id]);
+ }
+ }
}
// Adds the operations and their parameters to the NN API model.
// 'next-id' is the operand ID of the next operand of the model.
-void AddOpsAndParams(tflite::Interpreter* interpreter,
- ANeuralNetworksModel* nn_model, uint32_t next_id,
- std::vector<int>* model_state_inputs,
- std::vector<int>* model_state_outputs) {
+TfLiteStatus AddOpsAndParams(
+ tflite::Interpreter* interpreter, ANeuralNetworksModel* nn_model,
+ uint32_t next_id, std::vector<int>* model_state_inputs,
+ std::vector<int>* model_state_outputs,
+ const std::vector<int64_t>& tensor_id_to_nnapi_id) {
for (size_t i = 0; i < interpreter->nodes_size(); i++) {
const auto* node_and_registration = interpreter->node_and_registration(i);
const TfLiteNode& node = node_and_registration->first;
@@ -192,10 +234,11 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
static_cast<tflite::BuiltinOperator>(registration.builtin_code);
// Add the parameters.
- std::vector<uint32_t> augmented_inputs(
- node.inputs->data, node.inputs->data + node.inputs->size);
- std::vector<uint32_t> augmented_outputs(
- node.outputs->data, node.outputs->data + node.outputs->size);
+ std::vector<uint32_t> augmented_inputs, augmented_outputs;
+ MapAndAddTensorIds(node.inputs->data, node.inputs->size, &augmented_inputs,
+ tensor_id_to_nnapi_id);
+ MapAndAddTensorIds(node.outputs->data, node.outputs->size,
+ &augmented_outputs, tensor_id_to_nnapi_id);
auto add_scalar_int32 = [&nn_model, &augmented_inputs,
&next_id](int value) {
@@ -244,42 +287,54 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
model_state_outputs->push_back(tensor_id);
next_id++;
};
+ auto check_and_add_activation = [&add_scalar_int32](int activation) {
+ if (activation > kTfLiteActRelu6) {
+ FATAL("NNAPI only supports RELU, RELU1 and RELU6 activations");
+ }
+ add_scalar_int32(activation);
+ };
auto add_add_params = [&add_scalar_int32](void* data) {
auto* builtin = reinterpret_cast<TfLiteAddParams*>(data);
+ if (builtin->activation > kTfLiteActRelu6) {
+ FATAL("NNAPI only supports RELU, RELU1 and RELU6 activations");
+ }
add_scalar_int32(builtin->activation);
};
- auto add_pooling_params = [&add_scalar_int32](void* data) {
+ auto add_pooling_params = [&add_scalar_int32,
+ &check_and_add_activation](void* data) {
auto builtin = reinterpret_cast<TfLitePoolParams*>(data);
add_scalar_int32(builtin->padding);
add_scalar_int32(builtin->stride_width);
add_scalar_int32(builtin->stride_height);
add_scalar_int32(builtin->filter_width);
add_scalar_int32(builtin->filter_height);
- add_scalar_int32(builtin->activation);
+ check_and_add_activation(builtin->activation);
};
- auto add_convolution_params = [&add_scalar_int32](void* data) {
+ auto add_convolution_params = [&add_scalar_int32,
+ &check_and_add_activation](void* data) {
auto builtin = reinterpret_cast<TfLiteConvParams*>(data);
add_scalar_int32(builtin->padding);
add_scalar_int32(builtin->stride_width);
add_scalar_int32(builtin->stride_height);
- add_scalar_int32(builtin->activation);
+ check_and_add_activation(builtin->activation);
};
- auto add_depthwise_conv_params = [&add_scalar_int32](void* data) {
+ auto add_depthwise_conv_params = [&add_scalar_int32,
+ &check_and_add_activation](void* data) {
auto builtin = reinterpret_cast<TfLiteDepthwiseConvParams*>(data);
add_scalar_int32(builtin->padding);
add_scalar_int32(builtin->stride_width);
add_scalar_int32(builtin->stride_height);
add_scalar_int32(builtin->depth_multiplier);
- add_scalar_int32(builtin->activation);
+ check_and_add_activation(builtin->activation);
};
- auto add_fully_connected_params = [&add_scalar_int32](void* data) {
+ auto add_fully_connected_params = [&check_and_add_activation](void* data) {
auto builtin = reinterpret_cast<TfLiteFullyConnectedParams*>(data);
- add_scalar_int32(builtin->activation);
+ check_and_add_activation(builtin->activation);
};
auto add_concatenation_params = [&add_scalar_int32](void* data) {
@@ -311,6 +366,7 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
// LSTM in NNAPI requires scratch tensor as an output operand.
auto add_lstm_scratch_tensor_float32 = [interpreter, &node, &nn_model,
&next_id, &augmented_outputs]() {
+ if (node.temporaries->size == 0) return;
int scratch_buffer_index = node.temporaries->data[0];
const TfLiteTensor* tensor = interpreter->tensor(scratch_buffer_index);
ANeuralNetworksOperandType operand_type{
@@ -385,7 +441,14 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
add_pooling_params(node.builtin_data);
nn_op_type = ANEURALNETWORKS_L2_POOL_2D;
break;
- case tflite::BuiltinOperator_CONV_2D:
+ case tflite::BuiltinOperator_CONV_2D: {
+ auto builtin = reinterpret_cast<TfLiteConvParams*>(node.builtin_data);
+ if (builtin->dilation_width_factor != 1 ||
+ builtin->dilation_height_factor != 1 || node.inputs->size != 3) {
+ logError("NNAPI does not support dilated Conv2D.");
+ return kTfLiteError;
+ }
+ }
add_convolution_params(node.builtin_data);
nn_op_type = ANEURALNETWORKS_CONV_2D;
break;
@@ -429,6 +492,10 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
nn_op_type = ANEURALNETWORKS_SPACE_TO_DEPTH;
break;
case tflite::BuiltinOperator_LSTM: {
+ if (node.inputs->size + /* no of params */ 3 != 21) {
+ logError("NNAPI only supports 21-input LSTMs");
+ return kTfLiteError;
+ }
duplicate_state_tensor_float32(
node.outputs->data[/*kOutputStateTensor*/ 0]);
duplicate_state_tensor_float32(
@@ -467,16 +534,40 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
case tflite::BuiltinOperator_DIV:
nnapi_version = 11; // require NNAPI 1.1
nn_op_type = ANEURALNETWORKS_DIV;
+ check_and_add_activation(
+ reinterpret_cast<TfLiteDivParams*>(node.builtin_data)->activation);
break;
case tflite::BuiltinOperator_SUB:
nnapi_version = 11; // require NNAPI 1.1
nn_op_type = ANEURALNETWORKS_SUB;
+ check_and_add_activation(
+ reinterpret_cast<TfLiteSubParams*>(node.builtin_data)->activation);
break;
case tflite::BuiltinOperator_SQUEEZE:
nnapi_version = 11; // requires NNAPI 1.1
add_squeeze_params(node.builtin_data);
nn_op_type = ANEURALNETWORKS_SQUEEZE;
break;
+ case tflite::BuiltinOperator_TRANSPOSE:
+ // The permutation input tensor value dictates the output dimensions.
+ // TODO(b/110888333): Support dynamically-sized tensors in delegates.
+ if ((node.inputs->size > 1) &&
+ (interpreter->tensor(node.inputs->data[1])->allocation_type !=
+ kTfLiteMmapRo)) {
+ logError("NNAPI does not yet support dynamic tensors.");
+ return kTfLiteError;
+ }
+ nnapi_version = 11; // require NNAPI 1.1
+ nn_op_type = ANEURALNETWORKS_TRANSPOSE;
+ break;
+ case tflite::BuiltinOperator_L2_NORMALIZATION:
+ nn_op_type = ANEURALNETWORKS_L2_NORMALIZATION;
+ if (reinterpret_cast<TfLiteL2NormParams*>(node.builtin_data)
+ ->activation != kTfLiteActNone) {
+ FATAL(
+ "NNAPI does not support L2Normalization with fused activations");
+ }
+ break;
case tflite::BuiltinOperator_CONCAT_EMBEDDINGS:
case tflite::BuiltinOperator_LSH_PROJECTION:
case tflite::BuiltinOperator_HASHTABLE_LOOKUP:
@@ -485,7 +576,6 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
case tflite::BuiltinOperator_EMBEDDING_LOOKUP_SPARSE:
case tflite::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM:
case tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM:
- case tflite::BuiltinOperator_L2_NORMALIZATION:
case tflite::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION:
case tflite::BuiltinOperator_PADV2:
case tflite::BuiltinOperator_RESIZE_BILINEAR:
@@ -496,7 +586,6 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
case tflite::BuiltinOperator_SPACE_TO_BATCH_ND:
case tflite::BuiltinOperator_BATCH_TO_SPACE_ND:
case tflite::BuiltinOperator_TOPK_V2:
- case tflite::BuiltinOperator_TRANSPOSE:
case tflite::BuiltinOperator_SPLIT:
case tflite::BuiltinOperator_STRIDED_SLICE:
case tflite::BuiltinOperator_EXP:
@@ -508,6 +597,7 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
case tflite::BuiltinOperator_MAXIMUM:
case tflite::BuiltinOperator_MINIMUM:
case tflite::BuiltinOperator_ARG_MAX:
+ case tflite::BuiltinOperator_ARG_MIN:
case tflite::BuiltinOperator_GREATER:
case tflite::BuiltinOperator_GREATER_EQUAL:
case tflite::BuiltinOperator_LESS:
@@ -524,16 +614,21 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
case tflite::BuiltinOperator_EQUAL:
case tflite::BuiltinOperator_NOT_EQUAL:
case tflite::BuiltinOperator_SUM:
+ case tflite::BuiltinOperator_REDUCE_MAX:
+ case tflite::BuiltinOperator_REDUCE_PROD:
case tflite::BuiltinOperator_SQRT:
case tflite::BuiltinOperator_RSQRT:
case tflite::BuiltinOperator_SHAPE:
case tflite::BuiltinOperator_POW:
- FATAL("Op code %d is currently not delegated to NNAPI", builtin);
- nn_op_type = -1; // set to invalid
+ case tflite::BuiltinOperator_FAKE_QUANT:
+ case tflite::BuiltinOperator_PACK:
+ case tflite::BuiltinOperator_LOGICAL_OR:
+ logError("Op code %d is currently not delegated to NNAPI", builtin);
+ return kTfLiteError;
break;
case tflite::BuiltinOperator_CUSTOM:
- FATAL("Custom operations are not supported when using NNAPI.");
- nn_op_type = -1; // set to invalid
+ logError("Custom operations are not supported when using NNAPI.");
+ return kTfLiteError;
break;
}
@@ -542,47 +637,70 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
}
// Add the operation.
- CHECK_NN(ANeuralNetworksModel_addOperation(
+ RETURN_ERROR_IF_NN_FAILED(ANeuralNetworksModel_addOperation(
nn_model, nn_op_type, static_cast<uint32_t>(augmented_inputs.size()),
augmented_inputs.data(),
static_cast<uint32_t>(augmented_outputs.size()),
reinterpret_cast<uint32_t*>(augmented_outputs.data())));
}
+ return kTfLiteOk;
}
TfLiteStatus NNAPIDelegate::BuildGraph(Interpreter* interpreter) {
- // TODO(aselle): This is not correct. need to handle resize invalidation.
- if (nn_model_ && nn_compiled_model_) return kTfLiteOk;
+ if (nn_model_ && nn_compiled_model_) return model_status_;
+ // TODO(aselle): This is not correct. need to handle resize invalidation.
if (!nn_model_) {
CHECK_NN(ANeuralNetworksModel_create(&nn_model_));
- // Find all the temporary tensors and put them in a skip_list.
- std::vector<uint32_t> skip_list;
+ // Find which tensors should be added to NNAPI. TFLite has temporaries
+ // and RNN back-edges which are are not valid for NNAPI. We look through all
+ // inputs and outputs and mark the mapping in tensor_id_to_nnapi_id with
+ // kOperandIdNotSet. addTensorOperands will replace those with the
+ // corresponding NNAPI operand ids and skip kOperandNotNeeded entries.
+ std::vector<int64_t> tensor_id_to_nnapi_id(interpreter->tensors_size(),
+ kOperandNotNeeded);
+ auto set_ids_to_not_set = [&tensor_id_to_nnapi_id](const int* buf,
+ size_t count) {
+ for (int j = 0; j < count; j++) {
+ auto tensor_id = buf[j];
+ if (tensor_id != kOptionalTensor) {
+ tensor_id_to_nnapi_id[tensor_id] = kOperandIdNotSet;
+ }
+ }
+ };
for (size_t i = 0; i < interpreter->nodes_size(); i++) {
const auto* node_and_registration = interpreter->node_and_registration(i);
const TfLiteNode& node = node_and_registration->first;
- if (node.temporaries != nullptr) {
- for (int j = 0; j < node.temporaries->size; j++) {
- skip_list.push_back(static_cast<uint32_t>(node.temporaries->data[j]));
- }
- }
+ set_ids_to_not_set(node.inputs->data, node.inputs->size);
+ set_ids_to_not_set(node.outputs->data, node.outputs->size);
}
-
- uint32_t next_id = addTensorOperands(interpreter, nn_model_, skip_list);
- AddOpsAndParams(interpreter, nn_model_, next_id, &model_states_inputs_,
- &model_states_outputs_);
-
- std::vector<int> augmented_inputs = interpreter->inputs();
- std::vector<int> augmented_outputs = interpreter->outputs();
-
- // All state tensors input/output need to be treated as model input/output.
+ set_ids_to_not_set(interpreter->inputs().data(),
+ interpreter->inputs().size());
+ set_ids_to_not_set(interpreter->outputs().data(),
+ interpreter->outputs().size());
+
+ uint32_t next_id = 0;
+ RETURN_ERROR_IF_NN_FAILED(addTensorOperands(
+ interpreter, nn_model_, &next_id, &tensor_id_to_nnapi_id));
+ RETURN_ERROR_IF_NN_FAILED(
+ AddOpsAndParams(interpreter, nn_model_, next_id, &model_states_inputs_,
+ &model_states_outputs_, tensor_id_to_nnapi_id));
+
+ std::vector<uint32_t> augmented_inputs;
+ MapAndAddTensorIds(interpreter->inputs().data(),
+ interpreter->inputs().size(), &augmented_inputs,
+ tensor_id_to_nnapi_id);
augmented_inputs.insert(augmented_inputs.end(),
model_states_inputs_.begin(),
model_states_inputs_.end());
- augmented_outputs.insert(augmented_outputs.end(),
- model_states_outputs_.begin(),
- model_states_outputs_.end());
+ std::vector<uint32_t> augmented_outputs;
+ MapAndAddTensorIds(interpreter->outputs().data(),
+ interpreter->outputs().size(), &augmented_outputs,
+ tensor_id_to_nnapi_id);
+ MapAndAddTensorIds(model_states_outputs_.data(),
+ model_states_outputs_.size(), &augmented_outputs,
+ tensor_id_to_nnapi_id);
CHECK_NN(ANeuralNetworksModel_identifyInputsAndOutputs(
nn_model_, static_cast<uint32_t>(augmented_inputs.size()),
@@ -600,7 +718,13 @@ TfLiteStatus NNAPIDelegate::BuildGraph(Interpreter* interpreter) {
TfLiteStatus NNAPIDelegate::Invoke(Interpreter* interpreter) {
if (!nn_model_) {
- TF_LITE_ENSURE_STATUS(BuildGraph(interpreter));
+ model_status_ = BuildGraph(interpreter);
+ if (model_status_ != kTfLiteOk) {
+ logError("Failed to build graph for NNAPI");
+ }
+ }
+ if (model_status_ != kTfLiteOk) {
+ return model_status_;
}
ANeuralNetworksExecution* execution = nullptr;