1 files changed, 200 insertions, 76 deletions
diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc
index 7627d89c09..551e8ed320 100644
--- a/tensorflow/contrib/lite/nnapi_delegate.cc
+++ b/tensorflow/contrib/lite/nnapi_delegate.cc
@@ -29,27 +29,46 @@ limitations under the License.
 
 namespace tflite {
 
-// TODO(aselle): FATAL leaves resources hanging.
-void FATAL(const char* format, ...) {
+void logError(const char* format, ...) {
+  // TODO(mikie): use android logging, stderr is not captured for Java
+  // applications
   va_list args;
   va_start(args, format);
   vfprintf(stderr, format, args);
   va_end(args);
+  fprintf(stderr, "\n");
   fflush(stderr);
-  exit(1);
 }
 
+#define FATAL(...)       \
+  logError(__VA_ARGS__); \
+  exit(1);
+
 // TODO(aselle): Change the error model to use status codes.
-#define CHECK_TFLITE_SUCCESS(x)                       \
-  if (x != kTfLiteOk) {                               \
-    FATAL("Aborting since tflite returned failure."); \
+#define CHECK_TFLITE_SUCCESS(x)                                           \
+  if (x != kTfLiteOk) {                                                   \
+    FATAL("Aborting since tflite returned failure nnapi_delegate.cc:%d.", \
+          __LINE__);                                                      \
   }
 
-#define CHECK_NN(x)                                   \
-  if (x != ANEURALNETWORKS_NO_ERROR) {                \
-    FATAL("Aborting since tflite returned failure."); \
+#define CHECK_NN(x)                                                     \
+  if (x != ANEURALNETWORKS_NO_ERROR) {                                  \
+    FATAL("Aborting since NNAPI returned failure nnapi_delegate.cc:%d", \
+          __LINE__);                                                    \
   }
 
+#define RETURN_ERROR_IF_NN_FAILED(x)                                          \
+  if (x != ANEURALNETWORKS_NO_ERROR) {                                        \
+    logError(                                                                 \
+        "Returning error since NNAPI returned failure nnapi_delegate.cc:%d.", \
+        __LINE__);                                                            \
+    return kTfLiteError;                                                      \
+  }
+
+// Tracking of NNAPI operand ids
+static const int64_t kOperandIdNotSet = -1;
+static const int64_t kOperandNotNeeded = -2;
+
 namespace {
 
 int32_t GetAndroidSdkVersion() {
@@ -104,21 +123,16 @@ NNAPIDelegate::~NNAPIDelegate() {
 }
 
 // Adds the tensors of the interpreter to the NN API model.
-// Returns the number of operands added.
-uint32_t addTensorOperands(tflite::Interpreter* interpreter,
-                           ANeuralNetworksModel* nn_model,
-                           const std::vector<uint32_t>& skip_list) {
+TfLiteStatus addTensorOperands(tflite::Interpreter* interpreter,
+                               ANeuralNetworksModel* nn_model,
+                               uint32_t* no_of_operands_added,
+                               std::vector<int64_t>* nnapi_ids) {
   uint32_t next_id = 0;
   for (size_t i = 0; i < interpreter->tensors_size(); i++) {
-    // skip temporaries tensors.
-    bool shouldSkip = false;
-    for (auto skip_idx : skip_list) {
-      if (i == skip_idx) {
-        shouldSkip = true;
-        break;
-      }
-    }
-    if (shouldSkip) continue;
+    // Skip temporaries and RNN back-edges.
+    if ((*nnapi_ids)[i] == kOperandNotNeeded) continue;
+
+    (*nnapi_ids)[i] = int64_t(next_id);
 
     int32_t nn_type = 0;
     // NNAPI requires 32-bit float scale to be zero, tflite doesn't care
@@ -144,7 +158,18 @@ uint32_t addTensorOperands(tflite::Interpreter* interpreter,
         zeroPoint = tensor->params.zero_point;
         break;
       default:
-        FATAL("Unsupported type.");
+        logError("Unsupported tensor type %d", tensor->type);
+        return kTfLiteError;
+    }
+    if (tensor->dims->size == 0) {
+      logError("NNAPI doesn't support tensors with rank 0 (index %d name %s)",
+               i, tensor->name);
+      return kTfLiteError;
+    }
+    if (tensor->dims->size > 4) {
+      logError("NNAPI doesn't support tensors with rank > 4 (index %d name %s)",
+               i, tensor->name);
+      return kTfLiteError;
     }
     // TODO(aselle): Note, many of these are intermediate results. Do I need
     // to ever specify these sizes. I am currently below doing setValue
@@ -154,36 +179,53 @@ uint32_t addTensorOperands(tflite::Interpreter* interpreter,
     ANeuralNetworksOperandType operand_type{
         nn_type, static_cast<uint32_t>(tensor->dims->size),
         reinterpret_cast<uint32_t*>(tensor->dims->data), scale, zeroPoint};
-    CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type));
+    RETURN_ERROR_IF_NN_FAILED(
+        ANeuralNetworksModel_addOperand(nn_model, &operand_type));
     // TODO(aselle): Based on Michael's suggestion, limiting this to read
     // only memory
     if (tensor->allocation_type == kTfLiteMmapRo) {
       if (const NNAPIAllocation* alloc = dynamic_cast<const NNAPIAllocation*>(
               static_cast<const Allocation*>(tensor->allocation))) {
-        CHECK_NN(ANeuralNetworksModel_setOperandValueFromMemory(
-            nn_model, next_id, alloc->memory(), alloc->offset(tensor->data.raw),
-            tensor->bytes));
+        RETURN_ERROR_IF_NN_FAILED(
+            ANeuralNetworksModel_setOperandValueFromMemory(
+                nn_model, next_id, alloc->memory(),
+                alloc->offset(tensor->data.raw), tensor->bytes));
       } else {
-        CHECK_NN(ANeuralNetworksModel_setOperandValue(
+        RETURN_ERROR_IF_NN_FAILED(ANeuralNetworksModel_setOperandValue(
             nn_model, next_id, tensor->data.raw, tensor->bytes));
       }
     } else if (tensor->bytes == 0) {
       // These size 0 tensors are optional tensors reserved.
-      CHECK_NN(
+      RETURN_ERROR_IF_NN_FAILED(
           ANeuralNetworksModel_setOperandValue(nn_model, next_id, nullptr, 0));
     }
 
     ++next_id;
   }
-  return next_id;
+  *no_of_operands_added = next_id;
+  return kTfLiteOk;
+}
+
+void MapAndAddTensorIds(const int* from_ids_buf, size_t from_ids_count,
+                        std::vector<uint32_t>* into,
+                        const std::vector<int64_t>& map) {
+  for (size_t i = 0; i < from_ids_count; i++) {
+    int from_id = from_ids_buf[i];
+    if (from_id == kOptionalTensor) {
+      into->push_back(from_id);
+    } else {
+      into->push_back(map[from_id]);
+    }
+  }
 }
 
 // Adds the operations and their parameters to the NN API model.
 // 'next-id' is the operand ID of the next operand of the model.
-void AddOpsAndParams(tflite::Interpreter* interpreter,
-                     ANeuralNetworksModel* nn_model, uint32_t next_id,
-                     std::vector<int>* model_state_inputs,
-                     std::vector<int>* model_state_outputs) {
+TfLiteStatus AddOpsAndParams(
+    tflite::Interpreter* interpreter, ANeuralNetworksModel* nn_model,
+    uint32_t next_id, std::vector<int>* model_state_inputs,
+    std::vector<int>* model_state_outputs,
+    const std::vector<int64_t>& tensor_id_to_nnapi_id) {
   for (size_t i = 0; i < interpreter->nodes_size(); i++) {
     const auto* node_and_registration = interpreter->node_and_registration(i);
     const TfLiteNode& node = node_and_registration->first;
@@ -192,10 +234,11 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
         static_cast<tflite::BuiltinOperator>(registration.builtin_code);
 
     // Add the parameters.
-    std::vector<uint32_t> augmented_inputs(
-        node.inputs->data, node.inputs->data + node.inputs->size);
-    std::vector<uint32_t> augmented_outputs(
-        node.outputs->data, node.outputs->data + node.outputs->size);
+    std::vector<uint32_t> augmented_inputs, augmented_outputs;
+    MapAndAddTensorIds(node.inputs->data, node.inputs->size, &augmented_inputs,
+                       tensor_id_to_nnapi_id);
+    MapAndAddTensorIds(node.outputs->data, node.outputs->size,
+                       &augmented_outputs, tensor_id_to_nnapi_id);
 
     auto add_scalar_int32 = [&nn_model, &augmented_inputs,
                              &next_id](int value) {
@@ -244,42 +287,54 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
           model_state_outputs->push_back(tensor_id);
           next_id++;
         };
+    auto check_and_add_activation = [&add_scalar_int32](int activation) {
+      if (activation > kTfLiteActRelu6) {
+        FATAL("NNAPI only supports RELU, RELU1 and RELU6 activations");
+      }
+      add_scalar_int32(activation);
+    };
 
     auto add_add_params = [&add_scalar_int32](void* data) {
       auto* builtin = reinterpret_cast<TfLiteAddParams*>(data);
+      if (builtin->activation > kTfLiteActRelu6) {
+        FATAL("NNAPI only supports RELU, RELU1 and RELU6 activations");
+      }
       add_scalar_int32(builtin->activation);
     };
 
-    auto add_pooling_params = [&add_scalar_int32](void* data) {
+    auto add_pooling_params = [&add_scalar_int32,
+                               &check_and_add_activation](void* data) {
       auto builtin = reinterpret_cast<TfLitePoolParams*>(data);
       add_scalar_int32(builtin->padding);
       add_scalar_int32(builtin->stride_width);
       add_scalar_int32(builtin->stride_height);
       add_scalar_int32(builtin->filter_width);
       add_scalar_int32(builtin->filter_height);
-      add_scalar_int32(builtin->activation);
+      check_and_add_activation(builtin->activation);
     };
 
-    auto add_convolution_params = [&add_scalar_int32](void* data) {
+    auto add_convolution_params = [&add_scalar_int32,
+                                   &check_and_add_activation](void* data) {
       auto builtin = reinterpret_cast<TfLiteConvParams*>(data);
       add_scalar_int32(builtin->padding);
       add_scalar_int32(builtin->stride_width);
       add_scalar_int32(builtin->stride_height);
-      add_scalar_int32(builtin->activation);
+      check_and_add_activation(builtin->activation);
     };
 
-    auto add_depthwise_conv_params = [&add_scalar_int32](void* data) {
+    auto add_depthwise_conv_params = [&add_scalar_int32,
+                                      &check_and_add_activation](void* data) {
       auto builtin = reinterpret_cast<TfLiteDepthwiseConvParams*>(data);
       add_scalar_int32(builtin->padding);
       add_scalar_int32(builtin->stride_width);
       add_scalar_int32(builtin->stride_height);
       add_scalar_int32(builtin->depth_multiplier);
-      add_scalar_int32(builtin->activation);
+      check_and_add_activation(builtin->activation);
     };
 
-    auto add_fully_connected_params = [&add_scalar_int32](void* data) {
+    auto add_fully_connected_params = [&check_and_add_activation](void* data) {
       auto builtin = reinterpret_cast<TfLiteFullyConnectedParams*>(data);
-      add_scalar_int32(builtin->activation);
+      check_and_add_activation(builtin->activation);
     };
 
     auto add_concatenation_params = [&add_scalar_int32](void* data) {
@@ -311,6 +366,7 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
     // LSTM in NNAPI requires scratch tensor as an output operand.
     auto add_lstm_scratch_tensor_float32 = [interpreter, &node, &nn_model,
                                             &next_id, &augmented_outputs]() {
+      if (node.temporaries->size == 0) return;
       int scratch_buffer_index = node.temporaries->data[0];
       const TfLiteTensor* tensor = interpreter->tensor(scratch_buffer_index);
       ANeuralNetworksOperandType operand_type{
@@ -385,7 +441,14 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
         add_pooling_params(node.builtin_data);
         nn_op_type = ANEURALNETWORKS_L2_POOL_2D;
         break;
-      case tflite::BuiltinOperator_CONV_2D:
+      case tflite::BuiltinOperator_CONV_2D: {
+        auto builtin = reinterpret_cast<TfLiteConvParams*>(node.builtin_data);
+        if (builtin->dilation_width_factor != 1 ||
+            builtin->dilation_height_factor != 1 || node.inputs->size != 3) {
+          logError("NNAPI does not support dilated Conv2D.");
+          return kTfLiteError;
+        }
+      }
         add_convolution_params(node.builtin_data);
         nn_op_type = ANEURALNETWORKS_CONV_2D;
         break;
@@ -429,6 +492,10 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
         nn_op_type = ANEURALNETWORKS_SPACE_TO_DEPTH;
         break;
       case tflite::BuiltinOperator_LSTM: {
+        if (node.inputs->size + /* no of params */ 3 != 21) {
+          logError("NNAPI only supports 21-input LSTMs");
+          return kTfLiteError;
+        }
         duplicate_state_tensor_float32(
             node.outputs->data[/*kOutputStateTensor*/ 0]);
         duplicate_state_tensor_float32(
@@ -467,16 +534,40 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
       case tflite::BuiltinOperator_DIV:
         nnapi_version = 11;  // require NNAPI 1.1
         nn_op_type = ANEURALNETWORKS_DIV;
+        check_and_add_activation(
+            reinterpret_cast<TfLiteDivParams*>(node.builtin_data)->activation);
         break;
       case tflite::BuiltinOperator_SUB:
         nnapi_version = 11;  // require NNAPI 1.1
         nn_op_type = ANEURALNETWORKS_SUB;
+        check_and_add_activation(
+            reinterpret_cast<TfLiteSubParams*>(node.builtin_data)->activation);
         break;
       case tflite::BuiltinOperator_SQUEEZE:
         nnapi_version = 11;  // requires NNAPI 1.1
         add_squeeze_params(node.builtin_data);
         nn_op_type = ANEURALNETWORKS_SQUEEZE;
         break;
+      case tflite::BuiltinOperator_TRANSPOSE:
+        // The permutation input tensor value dictates the output dimensions.
+        // TODO(b/110888333): Support dynamically-sized tensors in delegates.
+        if ((node.inputs->size > 1) &&
+            (interpreter->tensor(node.inputs->data[1])->allocation_type !=
+             kTfLiteMmapRo)) {
+          logError("NNAPI does not yet support dynamic tensors.");
+          return kTfLiteError;
+        }
+        nnapi_version = 11;  // require NNAPI 1.1
+        nn_op_type = ANEURALNETWORKS_TRANSPOSE;
+        break;
+      case tflite::BuiltinOperator_L2_NORMALIZATION:
+        nn_op_type = ANEURALNETWORKS_L2_NORMALIZATION;
+        if (reinterpret_cast<TfLiteL2NormParams*>(node.builtin_data)
+                ->activation != kTfLiteActNone) {
+          FATAL(
+              "NNAPI does not support L2Normalization with fused activations");
+        }
+        break;
       case tflite::BuiltinOperator_CONCAT_EMBEDDINGS:
       case tflite::BuiltinOperator_LSH_PROJECTION:
       case tflite::BuiltinOperator_HASHTABLE_LOOKUP:
@@ -485,7 +576,6 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
       case tflite::BuiltinOperator_EMBEDDING_LOOKUP_SPARSE:
       case tflite::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM:
       case tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM:
-      case tflite::BuiltinOperator_L2_NORMALIZATION:
       case tflite::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION:
       case tflite::BuiltinOperator_PADV2:
       case tflite::BuiltinOperator_RESIZE_BILINEAR:
@@ -496,7 +586,6 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
       case tflite::BuiltinOperator_SPACE_TO_BATCH_ND:
       case tflite::BuiltinOperator_BATCH_TO_SPACE_ND:
       case tflite::BuiltinOperator_TOPK_V2:
-      case tflite::BuiltinOperator_TRANSPOSE:
       case tflite::BuiltinOperator_SPLIT:
       case tflite::BuiltinOperator_STRIDED_SLICE:
       case tflite::BuiltinOperator_EXP:
@@ -508,6 +597,7 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
       case tflite::BuiltinOperator_MAXIMUM:
       case tflite::BuiltinOperator_MINIMUM:
       case tflite::BuiltinOperator_ARG_MAX:
+      case tflite::BuiltinOperator_ARG_MIN:
       case tflite::BuiltinOperator_GREATER:
       case tflite::BuiltinOperator_GREATER_EQUAL:
       case tflite::BuiltinOperator_LESS:
@@ -524,16 +614,21 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
       case tflite::BuiltinOperator_EQUAL:
       case tflite::BuiltinOperator_NOT_EQUAL:
       case tflite::BuiltinOperator_SUM:
+      case tflite::BuiltinOperator_REDUCE_MAX:
+      case tflite::BuiltinOperator_REDUCE_PROD:
       case tflite::BuiltinOperator_SQRT:
       case tflite::BuiltinOperator_RSQRT:
       case tflite::BuiltinOperator_SHAPE:
       case tflite::BuiltinOperator_POW:
-        FATAL("Op code %d is currently not delegated to NNAPI", builtin);
-        nn_op_type = -1;  // set to invalid
+      case tflite::BuiltinOperator_FAKE_QUANT:
+      case tflite::BuiltinOperator_PACK:
+      case tflite::BuiltinOperator_LOGICAL_OR:
+        logError("Op code %d is currently not delegated to NNAPI", builtin);
+        return kTfLiteError;
         break;
       case tflite::BuiltinOperator_CUSTOM:
-        FATAL("Custom operations are not supported when using NNAPI.");
-        nn_op_type = -1;  // set to invalid
+        logError("Custom operations are not supported when using NNAPI.");
+        return kTfLiteError;
         break;
     }
 
@@ -542,47 +637,70 @@ void AddOpsAndParams(tflite::Interpreter* interpreter,
     }
 
     // Add the operation.
-    CHECK_NN(ANeuralNetworksModel_addOperation(
+    RETURN_ERROR_IF_NN_FAILED(ANeuralNetworksModel_addOperation(
         nn_model, nn_op_type, static_cast<uint32_t>(augmented_inputs.size()),
         augmented_inputs.data(),
         static_cast<uint32_t>(augmented_outputs.size()),
         reinterpret_cast<uint32_t*>(augmented_outputs.data())));
   }
+  return kTfLiteOk;
 }
 
 TfLiteStatus NNAPIDelegate::BuildGraph(Interpreter* interpreter) {
-  // TODO(aselle): This is not correct. need to handle resize invalidation.
-  if (nn_model_ && nn_compiled_model_) return kTfLiteOk;
+  if (nn_model_ && nn_compiled_model_) return model_status_;
 
+  // TODO(aselle): This is not correct. need to handle resize invalidation.
   if (!nn_model_) {
     CHECK_NN(ANeuralNetworksModel_create(&nn_model_));
 
-    // Find all the temporary tensors and put them in a skip_list.
-    std::vector<uint32_t> skip_list;
+    // Find which tensors should be added to NNAPI. TFLite has temporaries
+    // and RNN back-edges which are are not valid for NNAPI. We look through all
+    // inputs and outputs and mark the mapping in tensor_id_to_nnapi_id with
+    // kOperandIdNotSet. addTensorOperands will replace those with the
+    // corresponding NNAPI operand ids and skip kOperandNotNeeded entries.
+    std::vector<int64_t> tensor_id_to_nnapi_id(interpreter->tensors_size(),
+                                               kOperandNotNeeded);
+    auto set_ids_to_not_set = [&tensor_id_to_nnapi_id](const int* buf,
+                                                       size_t count) {
+      for (int j = 0; j < count; j++) {
+        auto tensor_id = buf[j];
+        if (tensor_id != kOptionalTensor) {
+          tensor_id_to_nnapi_id[tensor_id] = kOperandIdNotSet;
+        }
+      }
+    };
     for (size_t i = 0; i < interpreter->nodes_size(); i++) {
       const auto* node_and_registration = interpreter->node_and_registration(i);
       const TfLiteNode& node = node_and_registration->first;
-      if (node.temporaries != nullptr) {
-        for (int j = 0; j < node.temporaries->size; j++) {
-          skip_list.push_back(static_cast<uint32_t>(node.temporaries->data[j]));
-        }
-      }
+      set_ids_to_not_set(node.inputs->data, node.inputs->size);
+      set_ids_to_not_set(node.outputs->data, node.outputs->size);
     }
-
-    uint32_t next_id = addTensorOperands(interpreter, nn_model_, skip_list);
-    AddOpsAndParams(interpreter, nn_model_, next_id, &model_states_inputs_,
-                    &model_states_outputs_);
-
-    std::vector<int> augmented_inputs = interpreter->inputs();
-    std::vector<int> augmented_outputs = interpreter->outputs();
-
-    // All state tensors input/output need to be treated as model input/output.
+    set_ids_to_not_set(interpreter->inputs().data(),
+                       interpreter->inputs().size());
+    set_ids_to_not_set(interpreter->outputs().data(),
+                       interpreter->outputs().size());
+
+    uint32_t next_id = 0;
+    RETURN_ERROR_IF_NN_FAILED(addTensorOperands(
+        interpreter, nn_model_, &next_id, &tensor_id_to_nnapi_id));
+    RETURN_ERROR_IF_NN_FAILED(
+        AddOpsAndParams(interpreter, nn_model_, next_id, &model_states_inputs_,
+                        &model_states_outputs_, tensor_id_to_nnapi_id));
+
+    std::vector<uint32_t> augmented_inputs;
+    MapAndAddTensorIds(interpreter->inputs().data(),
+                       interpreter->inputs().size(), &augmented_inputs,
+                       tensor_id_to_nnapi_id);
     augmented_inputs.insert(augmented_inputs.end(),
                             model_states_inputs_.begin(),
                             model_states_inputs_.end());
-    augmented_outputs.insert(augmented_outputs.end(),
-                             model_states_outputs_.begin(),
-                             model_states_outputs_.end());
+    std::vector<uint32_t> augmented_outputs;
+    MapAndAddTensorIds(interpreter->outputs().data(),
+                       interpreter->outputs().size(), &augmented_outputs,
+                       tensor_id_to_nnapi_id);
+    MapAndAddTensorIds(model_states_outputs_.data(),
+                       model_states_outputs_.size(), &augmented_outputs,
+                       tensor_id_to_nnapi_id);
 
     CHECK_NN(ANeuralNetworksModel_identifyInputsAndOutputs(
         nn_model_, static_cast<uint32_t>(augmented_inputs.size()),
@@ -600,7 +718,13 @@ TfLiteStatus NNAPIDelegate::BuildGraph(Interpreter* interpreter) {
 
 TfLiteStatus NNAPIDelegate::Invoke(Interpreter* interpreter) {
   if (!nn_model_) {
-    TF_LITE_ENSURE_STATUS(BuildGraph(interpreter));
+    model_status_ = BuildGraph(interpreter);
+    if (model_status_ != kTfLiteOk) {
+      logError("Failed to build graph for NNAPI");
+    }
+  }
+  if (model_status_ != kTfLiteOk) {
+    return model_status_;
   }
 
   ANeuralNetworksExecution* execution = nullptr;