aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/lite/delegates/nnapi
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-08-10 16:03:54 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-08-10 16:13:31 -0700
commitbdc3b303f1ba5b22cac1df0b605ad0e0c45421f1 (patch)
treeb41d0ddbc4ad15d24a731957db90c9457f602fe6 /tensorflow/contrib/lite/delegates/nnapi
parentbe3f9abf85cb8f0a80aa034ec8bfb6b5844fd3e6 (diff)
Use shared memory for NNAPI input and output.
PiperOrigin-RevId: 208283489
Diffstat (limited to 'tensorflow/contrib/lite/delegates/nnapi')
-rw-r--r--tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc81
1 files changed, 77 insertions, 4 deletions
diff --git a/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc b/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc
index 17fa120cf9..e6cc3dd99c 100644
--- a/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc
+++ b/tensorflow/contrib/lite/delegates/nnapi/nnapi_delegate.cc
@@ -27,7 +27,9 @@ limitations under the License.
#include "tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h"
#ifdef __ANDROID__
+#include <sys/mman.h>
#include <sys/system_properties.h>
+#include <unistd.h>
#endif
namespace tflite {
@@ -80,6 +82,44 @@ struct NNFreeCompilation {
}
};
+// Manage NNAPI shared memory handle
+class NNMemory {
+ public:
+ NNMemory(const char* name, size_t size) {
+#ifdef __ANDROID__
+ byte_size_ = size;
+ fd_ = ASharedMemory_create(name, size);
+ data_ptr_ = reinterpret_cast<uint8_t*>(
+ mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0));
+ ANeuralNetworksMemory_createFromFd(size, PROT_READ | PROT_WRITE, fd_, 0,
+ &nn_memory_handle_);
+#endif
+ }
+
+ ~NNMemory() {
+#ifdef __ANDROID__
+ if (data_ptr_) {
+ munmap(data_ptr_, byte_size_);
+ }
+ if (nn_memory_handle_) {
+ ANeuralNetworksMemory_free(nn_memory_handle_);
+ }
+ if (fd_ > 0) close(fd_);
+#endif
+ }
+
+ ANeuralNetworksMemory* get_handle() { return nn_memory_handle_; }
+ uint8_t* get_data_ptr() { return data_ptr_; }
+
+ private:
+#ifdef __ANDROID__
+ int fd_ = 0;
+ size_t byte_size_ = 0;
+#endif
+ uint8_t* data_ptr_ = nullptr;
+ ANeuralNetworksMemory* nn_memory_handle_ = nullptr;
+}; // namespace
+
// Track tensor indices to NN API tensor indices mapping.
class OperandMapping {
public:
@@ -911,6 +951,8 @@ class NNAPIDelegateKernel {
// absolute indices but NN api indices inputs by relative indices.
int relative_input_index = 0;
int num_optional_tensors = 0;
+
+ size_t input_offset = 0;
for (auto absolute_input_index : TfLiteIntArrayView(node->inputs)) {
if (absolute_input_index == kOptionalTensor) {
num_optional_tensors++;
@@ -920,20 +962,28 @@ class NNAPIDelegateKernel {
// TODO(miaowang): make sure the delegation works with dequantized weights
// as intermediate tensors.
if (tensor->allocation_type != kTfLiteMmapRo) {
- CHECK_NN(context, ANeuralNetworksExecution_setInput(
+ // copy data to pre-allocated shared memory.
+ memcpy(nn_input_memory_->get_data_ptr() + input_offset,
+ tensor->data.raw, tensor->bytes);
+ CHECK_NN(context, ANeuralNetworksExecution_setInputFromMemory(
execution, relative_input_index, nullptr,
- tensor->data.raw, tensor->bytes));
+ nn_input_memory_->get_handle(), input_offset,
+ tensor->bytes));
+ input_offset += tensor->bytes;
relative_input_index++;
}
}
// Set the output tensor buffers.
int relative_output_index = 0;
+ size_t output_offset = 0;
for (auto output_index : TfLiteIntArrayView(node->outputs)) {
TfLiteTensor* tensor = &context->tensors[output_index];
- CHECK_NN(context, ANeuralNetworksExecution_setOutput(
+ CHECK_NN(context, ANeuralNetworksExecution_setOutputFromMemory(
execution, relative_output_index, nullptr,
- tensor->data.raw, tensor->bytes));
+ nn_output_memory_->get_handle(), output_offset,
+ tensor->bytes));
+ output_offset += tensor->bytes;
relative_output_index++;
}
@@ -957,6 +1007,15 @@ class NNAPIDelegateKernel {
ANeuralNetworksEvent_free(event);
ANeuralNetworksExecution_free(execution);
+ // copy results from shared memory to the destination.
+ output_offset = 0;
+ for (auto output_index : TfLiteIntArrayView(node->outputs)) {
+ TfLiteTensor* tensor = &context->tensors[output_index];
+ memcpy(tensor->data.raw,
+ nn_output_memory_->get_data_ptr() + output_offset, tensor->bytes);
+ output_offset += tensor->bytes;
+ }
+
return kTfLiteOk;
}
@@ -974,6 +1033,9 @@ class NNAPIDelegateKernel {
std::vector<int> model_state_inputs_;
std::vector<int> model_state_tfl_outputs_;
+ std::unique_ptr<NNMemory> nn_input_memory_;
+ std::unique_ptr<NNMemory> nn_output_memory_;
+
TfLiteStatus AddOpsAndTensors(TfLiteContext* context) {
// The operand builder allows creating a single op. We create it at this
// reduced power position rather than in the for loop to avoid reallocating
@@ -1024,21 +1086,27 @@ class NNAPIDelegateKernel {
inputs.reserve(input_tensors->size);
std::vector<uint32_t> outputs;
outputs.reserve(output_tensors->size);
+
+ size_t total_input_byte_size = 0;
// Make the TensorFlow lite inputs and outputs to ann_indices.
for (int i : TfLiteIntArrayView(input_tensors)) {
// Constant tensors are not NNAPI inputs.
if (i != kOptionalTensor &&
context->tensors[i].allocation_type != kTfLiteMmapRo) {
inputs.push_back(operand_mapping_.lite_index_to_ann(i));
+ total_input_byte_size += context->tensors[i].bytes;
}
}
+
// Add state input tensors as model inputs
for (int i : model_state_inputs_) {
inputs.push_back(i);
}
+ size_t total_output_byte_size = 0;
for (int i : TfLiteIntArrayView(output_tensors)) {
outputs.push_back(operand_mapping_.lite_index_to_ann(i));
+ total_output_byte_size += context->tensors[i].bytes;
}
// Tell ANN to declare inputs/outputs
@@ -1048,6 +1116,11 @@ class NNAPIDelegateKernel {
// Finalize the model
CHECK_NN(context, ANeuralNetworksModel_finish(nn_model_.get()));
+ // Create shared memory pool for inputs and outputs.
+ nn_input_memory_.reset(new NNMemory("input_pool", total_input_byte_size));
+ nn_output_memory_.reset(
+ new NNMemory("output_pool", total_output_byte_size));
+
return kTfLiteOk;
}
};