diff options
Diffstat (limited to 'tensorflow/contrib/lite/nnapi_delegate.cc')
-rw-r--r-- | tensorflow/contrib/lite/nnapi_delegate.cc | 276 |
1 files changed, 200 insertions, 76 deletions
diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc index 7627d89c09..551e8ed320 100644 --- a/tensorflow/contrib/lite/nnapi_delegate.cc +++ b/tensorflow/contrib/lite/nnapi_delegate.cc @@ -29,27 +29,46 @@ limitations under the License. namespace tflite { -// TODO(aselle): FATAL leaves resources hanging. -void FATAL(const char* format, ...) { +void logError(const char* format, ...) { + // TODO(mikie): use android logging, stderr is not captured for Java + // applications va_list args; va_start(args, format); vfprintf(stderr, format, args); va_end(args); + fprintf(stderr, "\n"); fflush(stderr); - exit(1); } +#define FATAL(...) \ + logError(__VA_ARGS__); \ + exit(1); + // TODO(aselle): Change the error model to use status codes. -#define CHECK_TFLITE_SUCCESS(x) \ - if (x != kTfLiteOk) { \ - FATAL("Aborting since tflite returned failure."); \ +#define CHECK_TFLITE_SUCCESS(x) \ + if (x != kTfLiteOk) { \ + FATAL("Aborting since tflite returned failure nnapi_delegate.cc:%d.", \ + __LINE__); \ } -#define CHECK_NN(x) \ - if (x != ANEURALNETWORKS_NO_ERROR) { \ - FATAL("Aborting since tflite returned failure."); \ +#define CHECK_NN(x) \ + if (x != ANEURALNETWORKS_NO_ERROR) { \ + FATAL("Aborting since NNAPI returned failure nnapi_delegate.cc:%d", \ + __LINE__); \ } +#define RETURN_ERROR_IF_NN_FAILED(x) \ + if (x != ANEURALNETWORKS_NO_ERROR) { \ + logError( \ + "Returning error since NNAPI returned failure nnapi_delegate.cc:%d.", \ + __LINE__); \ + return kTfLiteError; \ + } + +// Tracking of NNAPI operand ids +static const int64_t kOperandIdNotSet = -1; +static const int64_t kOperandNotNeeded = -2; + namespace { int32_t GetAndroidSdkVersion() { @@ -104,21 +123,16 @@ NNAPIDelegate::~NNAPIDelegate() { } // Adds the tensors of the interpreter to the NN API model. -// Returns the number of operands added. -uint32_t addTensorOperands(tflite::Interpreter* interpreter, - ANeuralNetworksModel* nn_model, - const std::vector<uint32_t>& skip_list) { +TfLiteStatus addTensorOperands(tflite::Interpreter* interpreter, + ANeuralNetworksModel* nn_model, + uint32_t* no_of_operands_added, + std::vector<int64_t>* nnapi_ids) { uint32_t next_id = 0; for (size_t i = 0; i < interpreter->tensors_size(); i++) { - // skip temporaries tensors. - bool shouldSkip = false; - for (auto skip_idx : skip_list) { - if (i == skip_idx) { - shouldSkip = true; - break; - } - } - if (shouldSkip) continue; + // Skip temporaries and RNN back-edges. + if ((*nnapi_ids)[i] == kOperandNotNeeded) continue; + + (*nnapi_ids)[i] = int64_t(next_id); int32_t nn_type = 0; // NNAPI requires 32-bit float scale to be zero, tflite doesn't care @@ -144,7 +158,18 @@ uint32_t addTensorOperands(tflite::Interpreter* interpreter, zeroPoint = tensor->params.zero_point; break; default: - FATAL("Unsupported type."); + logError("Unsupported tensor type %d", tensor->type); + return kTfLiteError; + } + if (tensor->dims->size == 0) { + logError("NNAPI doesn't support tensors with rank 0 (index %d name %s)", + i, tensor->name); + return kTfLiteError; + } + if (tensor->dims->size > 4) { + logError("NNAPI doesn't support tensors with rank > 4 (index %d name %s)", + i, tensor->name); + return kTfLiteError; } // TODO(aselle): Note, many of these are intermediate results. Do I need // to ever specify these sizes. I am currently below doing setValue @@ -154,36 +179,53 @@ uint32_t addTensorOperands(tflite::Interpreter* interpreter, ANeuralNetworksOperandType operand_type{ nn_type, static_cast<uint32_t>(tensor->dims->size), reinterpret_cast<uint32_t*>(tensor->dims->data), scale, zeroPoint}; - CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type)); + RETURN_ERROR_IF_NN_FAILED( + ANeuralNetworksModel_addOperand(nn_model, &operand_type)); // TODO(aselle): Based on Michael's suggestion, limiting this to read // only memory if (tensor->allocation_type == kTfLiteMmapRo) { if (const NNAPIAllocation* alloc = dynamic_cast<const NNAPIAllocation*>( static_cast<const Allocation*>(tensor->allocation))) { - CHECK_NN(ANeuralNetworksModel_setOperandValueFromMemory( - nn_model, next_id, alloc->memory(), alloc->offset(tensor->data.raw), - tensor->bytes)); + RETURN_ERROR_IF_NN_FAILED( + ANeuralNetworksModel_setOperandValueFromMemory( + nn_model, next_id, alloc->memory(), + alloc->offset(tensor->data.raw), tensor->bytes)); } else { - CHECK_NN(ANeuralNetworksModel_setOperandValue( + RETURN_ERROR_IF_NN_FAILED(ANeuralNetworksModel_setOperandValue( nn_model, next_id, tensor->data.raw, tensor->bytes)); } } else if (tensor->bytes == 0) { // These size 0 tensors are optional tensors reserved. - CHECK_NN( + RETURN_ERROR_IF_NN_FAILED( ANeuralNetworksModel_setOperandValue(nn_model, next_id, nullptr, 0)); } ++next_id; } - return next_id; + *no_of_operands_added = next_id; + return kTfLiteOk; +} + +void MapAndAddTensorIds(const int* from_ids_buf, size_t from_ids_count, + std::vector<uint32_t>* into, + const std::vector<int64_t>& map) { + for (size_t i = 0; i < from_ids_count; i++) { + int from_id = from_ids_buf[i]; + if (from_id == kOptionalTensor) { + into->push_back(from_id); + } else { + into->push_back(map[from_id]); + } + } } // Adds the operations and their parameters to the NN API model. // 'next-id' is the operand ID of the next operand of the model. -void AddOpsAndParams(tflite::Interpreter* interpreter, - ANeuralNetworksModel* nn_model, uint32_t next_id, - std::vector<int>* model_state_inputs, - std::vector<int>* model_state_outputs) { +TfLiteStatus AddOpsAndParams( + tflite::Interpreter* interpreter, ANeuralNetworksModel* nn_model, + uint32_t next_id, std::vector<int>* model_state_inputs, + std::vector<int>* model_state_outputs, + const std::vector<int64_t>& tensor_id_to_nnapi_id) { for (size_t i = 0; i < interpreter->nodes_size(); i++) { const auto* node_and_registration = interpreter->node_and_registration(i); const TfLiteNode& node = node_and_registration->first; @@ -192,10 +234,11 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, static_cast<tflite::BuiltinOperator>(registration.builtin_code); // Add the parameters. - std::vector<uint32_t> augmented_inputs( - node.inputs->data, node.inputs->data + node.inputs->size); - std::vector<uint32_t> augmented_outputs( - node.outputs->data, node.outputs->data + node.outputs->size); + std::vector<uint32_t> augmented_inputs, augmented_outputs; + MapAndAddTensorIds(node.inputs->data, node.inputs->size, &augmented_inputs, + tensor_id_to_nnapi_id); + MapAndAddTensorIds(node.outputs->data, node.outputs->size, + &augmented_outputs, tensor_id_to_nnapi_id); auto add_scalar_int32 = [&nn_model, &augmented_inputs, &next_id](int value) { @@ -244,42 +287,54 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, model_state_outputs->push_back(tensor_id); next_id++; }; + auto check_and_add_activation = [&add_scalar_int32](int activation) { + if (activation > kTfLiteActRelu6) { + FATAL("NNAPI only supports RELU, RELU1 and RELU6 activations"); + } + add_scalar_int32(activation); + }; auto add_add_params = [&add_scalar_int32](void* data) { auto* builtin = reinterpret_cast<TfLiteAddParams*>(data); + if (builtin->activation > kTfLiteActRelu6) { + FATAL("NNAPI only supports RELU, RELU1 and RELU6 activations"); + } add_scalar_int32(builtin->activation); }; - auto add_pooling_params = [&add_scalar_int32](void* data) { + auto add_pooling_params = [&add_scalar_int32, + &check_and_add_activation](void* data) { auto builtin = reinterpret_cast<TfLitePoolParams*>(data); add_scalar_int32(builtin->padding); add_scalar_int32(builtin->stride_width); add_scalar_int32(builtin->stride_height); add_scalar_int32(builtin->filter_width); add_scalar_int32(builtin->filter_height); - add_scalar_int32(builtin->activation); + check_and_add_activation(builtin->activation); }; - auto add_convolution_params = [&add_scalar_int32](void* data) { + auto add_convolution_params = [&add_scalar_int32, + &check_and_add_activation](void* data) { auto builtin = reinterpret_cast<TfLiteConvParams*>(data); add_scalar_int32(builtin->padding); add_scalar_int32(builtin->stride_width); add_scalar_int32(builtin->stride_height); - add_scalar_int32(builtin->activation); + check_and_add_activation(builtin->activation); }; - auto add_depthwise_conv_params = [&add_scalar_int32](void* data) { + auto add_depthwise_conv_params = [&add_scalar_int32, + &check_and_add_activation](void* data) { auto builtin = reinterpret_cast<TfLiteDepthwiseConvParams*>(data); add_scalar_int32(builtin->padding); add_scalar_int32(builtin->stride_width); add_scalar_int32(builtin->stride_height); add_scalar_int32(builtin->depth_multiplier); - add_scalar_int32(builtin->activation); + check_and_add_activation(builtin->activation); }; - auto add_fully_connected_params = [&add_scalar_int32](void* data) { + auto add_fully_connected_params = [&check_and_add_activation](void* data) { auto builtin = reinterpret_cast<TfLiteFullyConnectedParams*>(data); - add_scalar_int32(builtin->activation); + check_and_add_activation(builtin->activation); }; auto add_concatenation_params = [&add_scalar_int32](void* data) { @@ -311,6 +366,7 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, // LSTM in NNAPI requires scratch tensor as an output operand. auto add_lstm_scratch_tensor_float32 = [interpreter, &node, &nn_model, &next_id, &augmented_outputs]() { + if (node.temporaries->size == 0) return; int scratch_buffer_index = node.temporaries->data[0]; const TfLiteTensor* tensor = interpreter->tensor(scratch_buffer_index); ANeuralNetworksOperandType operand_type{ @@ -385,7 +441,14 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, add_pooling_params(node.builtin_data); nn_op_type = ANEURALNETWORKS_L2_POOL_2D; break; - case tflite::BuiltinOperator_CONV_2D: + case tflite::BuiltinOperator_CONV_2D: { + auto builtin = reinterpret_cast<TfLiteConvParams*>(node.builtin_data); + if (builtin->dilation_width_factor != 1 || + builtin->dilation_height_factor != 1 || node.inputs->size != 3) { + logError("NNAPI does not support dilated Conv2D."); + return kTfLiteError; + } + } add_convolution_params(node.builtin_data); nn_op_type = ANEURALNETWORKS_CONV_2D; break; @@ -429,6 +492,10 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, nn_op_type = ANEURALNETWORKS_SPACE_TO_DEPTH; break; case tflite::BuiltinOperator_LSTM: { + if (node.inputs->size + /* no of params */ 3 != 21) { + logError("NNAPI only supports 21-input LSTMs"); + return kTfLiteError; + } duplicate_state_tensor_float32( node.outputs->data[/*kOutputStateTensor*/ 0]); duplicate_state_tensor_float32( @@ -467,16 +534,40 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, case tflite::BuiltinOperator_DIV: nnapi_version = 11; // require NNAPI 1.1 nn_op_type = ANEURALNETWORKS_DIV; + check_and_add_activation( + reinterpret_cast<TfLiteDivParams*>(node.builtin_data)->activation); break; case tflite::BuiltinOperator_SUB: nnapi_version = 11; // require NNAPI 1.1 nn_op_type = ANEURALNETWORKS_SUB; + check_and_add_activation( + reinterpret_cast<TfLiteSubParams*>(node.builtin_data)->activation); break; case tflite::BuiltinOperator_SQUEEZE: nnapi_version = 11; // requires NNAPI 1.1 add_squeeze_params(node.builtin_data); nn_op_type = ANEURALNETWORKS_SQUEEZE; break; + case tflite::BuiltinOperator_TRANSPOSE: + // The permutation input tensor value dictates the output dimensions. + // TODO(b/110888333): Support dynamically-sized tensors in delegates. + if ((node.inputs->size > 1) && + (interpreter->tensor(node.inputs->data[1])->allocation_type != + kTfLiteMmapRo)) { + logError("NNAPI does not yet support dynamic tensors."); + return kTfLiteError; + } + nnapi_version = 11; // require NNAPI 1.1 + nn_op_type = ANEURALNETWORKS_TRANSPOSE; + break; + case tflite::BuiltinOperator_L2_NORMALIZATION: + nn_op_type = ANEURALNETWORKS_L2_NORMALIZATION; + if (reinterpret_cast<TfLiteL2NormParams*>(node.builtin_data) + ->activation != kTfLiteActNone) { + FATAL( + "NNAPI does not support L2Normalization with fused activations"); + } + break; case tflite::BuiltinOperator_CONCAT_EMBEDDINGS: case tflite::BuiltinOperator_LSH_PROJECTION: case tflite::BuiltinOperator_HASHTABLE_LOOKUP: @@ -485,7 +576,6 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, case tflite::BuiltinOperator_EMBEDDING_LOOKUP_SPARSE: case tflite::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM: case tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM: - case tflite::BuiltinOperator_L2_NORMALIZATION: case tflite::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION: case tflite::BuiltinOperator_PADV2: case tflite::BuiltinOperator_RESIZE_BILINEAR: @@ -496,7 +586,6 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, case tflite::BuiltinOperator_SPACE_TO_BATCH_ND: case tflite::BuiltinOperator_BATCH_TO_SPACE_ND: case tflite::BuiltinOperator_TOPK_V2: - case tflite::BuiltinOperator_TRANSPOSE: case tflite::BuiltinOperator_SPLIT: case tflite::BuiltinOperator_STRIDED_SLICE: case tflite::BuiltinOperator_EXP: @@ -508,6 +597,7 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, case tflite::BuiltinOperator_MAXIMUM: case tflite::BuiltinOperator_MINIMUM: case tflite::BuiltinOperator_ARG_MAX: + case tflite::BuiltinOperator_ARG_MIN: case tflite::BuiltinOperator_GREATER: case tflite::BuiltinOperator_GREATER_EQUAL: case tflite::BuiltinOperator_LESS: @@ -524,16 +614,21 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, case tflite::BuiltinOperator_EQUAL: case tflite::BuiltinOperator_NOT_EQUAL: case tflite::BuiltinOperator_SUM: + case tflite::BuiltinOperator_REDUCE_MAX: + case tflite::BuiltinOperator_REDUCE_PROD: case tflite::BuiltinOperator_SQRT: case tflite::BuiltinOperator_RSQRT: case tflite::BuiltinOperator_SHAPE: case tflite::BuiltinOperator_POW: - FATAL("Op code %d is currently not delegated to NNAPI", builtin); - nn_op_type = -1; // set to invalid + case tflite::BuiltinOperator_FAKE_QUANT: + case tflite::BuiltinOperator_PACK: + case tflite::BuiltinOperator_LOGICAL_OR: + logError("Op code %d is currently not delegated to NNAPI", builtin); + return kTfLiteError; break; case tflite::BuiltinOperator_CUSTOM: - FATAL("Custom operations are not supported when using NNAPI."); - nn_op_type = -1; // set to invalid + logError("Custom operations are not supported when using NNAPI."); + return kTfLiteError; break; } @@ -542,47 +637,70 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, } // Add the operation. - CHECK_NN(ANeuralNetworksModel_addOperation( + RETURN_ERROR_IF_NN_FAILED(ANeuralNetworksModel_addOperation( nn_model, nn_op_type, static_cast<uint32_t>(augmented_inputs.size()), augmented_inputs.data(), static_cast<uint32_t>(augmented_outputs.size()), reinterpret_cast<uint32_t*>(augmented_outputs.data()))); } + return kTfLiteOk; } TfLiteStatus NNAPIDelegate::BuildGraph(Interpreter* interpreter) { - // TODO(aselle): This is not correct. need to handle resize invalidation. - if (nn_model_ && nn_compiled_model_) return kTfLiteOk; + if (nn_model_ && nn_compiled_model_) return model_status_; + // TODO(aselle): This is not correct. need to handle resize invalidation. if (!nn_model_) { CHECK_NN(ANeuralNetworksModel_create(&nn_model_)); - // Find all the temporary tensors and put them in a skip_list. - std::vector<uint32_t> skip_list; + // Find which tensors should be added to NNAPI. TFLite has temporaries + // and RNN back-edges which are are not valid for NNAPI. We look through all + // inputs and outputs and mark the mapping in tensor_id_to_nnapi_id with + // kOperandIdNotSet. addTensorOperands will replace those with the + // corresponding NNAPI operand ids and skip kOperandNotNeeded entries. + std::vector<int64_t> tensor_id_to_nnapi_id(interpreter->tensors_size(), + kOperandNotNeeded); + auto set_ids_to_not_set = [&tensor_id_to_nnapi_id](const int* buf, + size_t count) { + for (int j = 0; j < count; j++) { + auto tensor_id = buf[j]; + if (tensor_id != kOptionalTensor) { + tensor_id_to_nnapi_id[tensor_id] = kOperandIdNotSet; + } + } + }; for (size_t i = 0; i < interpreter->nodes_size(); i++) { const auto* node_and_registration = interpreter->node_and_registration(i); const TfLiteNode& node = node_and_registration->first; - if (node.temporaries != nullptr) { - for (int j = 0; j < node.temporaries->size; j++) { - skip_list.push_back(static_cast<uint32_t>(node.temporaries->data[j])); - } - } + set_ids_to_not_set(node.inputs->data, node.inputs->size); + set_ids_to_not_set(node.outputs->data, node.outputs->size); } - - uint32_t next_id = addTensorOperands(interpreter, nn_model_, skip_list); - AddOpsAndParams(interpreter, nn_model_, next_id, &model_states_inputs_, - &model_states_outputs_); - - std::vector<int> augmented_inputs = interpreter->inputs(); - std::vector<int> augmented_outputs = interpreter->outputs(); - - // All state tensors input/output need to be treated as model input/output. + set_ids_to_not_set(interpreter->inputs().data(), + interpreter->inputs().size()); + set_ids_to_not_set(interpreter->outputs().data(), + interpreter->outputs().size()); + + uint32_t next_id = 0; + RETURN_ERROR_IF_NN_FAILED(addTensorOperands( + interpreter, nn_model_, &next_id, &tensor_id_to_nnapi_id)); + RETURN_ERROR_IF_NN_FAILED( + AddOpsAndParams(interpreter, nn_model_, next_id, &model_states_inputs_, + &model_states_outputs_, tensor_id_to_nnapi_id)); + + std::vector<uint32_t> augmented_inputs; + MapAndAddTensorIds(interpreter->inputs().data(), + interpreter->inputs().size(), &augmented_inputs, + tensor_id_to_nnapi_id); augmented_inputs.insert(augmented_inputs.end(), model_states_inputs_.begin(), model_states_inputs_.end()); - augmented_outputs.insert(augmented_outputs.end(), - model_states_outputs_.begin(), - model_states_outputs_.end()); + std::vector<uint32_t> augmented_outputs; + MapAndAddTensorIds(interpreter->outputs().data(), + interpreter->outputs().size(), &augmented_outputs, + tensor_id_to_nnapi_id); + MapAndAddTensorIds(model_states_outputs_.data(), + model_states_outputs_.size(), &augmented_outputs, + tensor_id_to_nnapi_id); CHECK_NN(ANeuralNetworksModel_identifyInputsAndOutputs( nn_model_, static_cast<uint32_t>(augmented_inputs.size()), @@ -600,7 +718,13 @@ TfLiteStatus NNAPIDelegate::BuildGraph(Interpreter* interpreter) { TfLiteStatus NNAPIDelegate::Invoke(Interpreter* interpreter) { if (!nn_model_) { - TF_LITE_ENSURE_STATUS(BuildGraph(interpreter)); + model_status_ = BuildGraph(interpreter); + if (model_status_ != kTfLiteOk) { + logError("Failed to build graph for NNAPI"); + } + } + if (model_status_ != kTfLiteOk) { + return model_status_; } ANeuralNetworksExecution* execution = nullptr; |