aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-01-13 16:21:55 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-01-13 16:25:51 -0800
commite6ff665dbe4888aa5fdff8f34c44405acca2ddd1 (patch)
treea290fe97b76bebec82ef3b3a76c906acf6b55f41 /tensorflow/contrib
parenta4973345351a14a786987cd7f648a99c029fdc1d (diff)
Clean up the allocation logic in the interpreter.
PiperOrigin-RevId: 181865795
Diffstat (limited to 'tensorflow/contrib')
-rw-r--r--tensorflow/contrib/lite/BUILD49
-rw-r--r--tensorflow/contrib/lite/arena_planner.cc247
-rw-r--r--tensorflow/contrib/lite/arena_planner.h107
-rw-r--r--tensorflow/contrib/lite/arena_planner_test.cc472
-rw-r--r--tensorflow/contrib/lite/graph_info.h53
-rw-r--r--tensorflow/contrib/lite/interpreter.cc304
-rw-r--r--tensorflow/contrib/lite/interpreter.h40
-rw-r--r--tensorflow/contrib/lite/memory_planner.h45
-rw-r--r--tensorflow/contrib/lite/simple_memory_arena.h4
9 files changed, 1096 insertions, 225 deletions
diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD
index 3f1b0be1a7..13350c5a43 100644
--- a/tensorflow/contrib/lite/BUILD
+++ b/tensorflow/contrib/lite/BUILD
@@ -35,6 +35,28 @@ cc_library(
hdrs = ["version.h"],
)
+cc_library(
+ name = "arena_planner",
+ srcs = ["arena_planner.cc"],
+ hdrs = ["arena_planner.h"],
+ deps = [
+ ":context",
+ ":graph_info",
+ ":memory_planner",
+ ":simple_memory_arena",
+ ],
+)
+
+cc_test(
+ name = "arena_planner_test",
+ size = "small",
+ srcs = ["arena_planner_test.cc"],
+ deps = [
+ ":arena_planner",
+ "@com_google_googletest//:gtest",
+ ],
+)
+
# Main library. No ops are included here.
# TODO(aselle): Resolve problems preventing C99 usage.
cc_library(
@@ -44,6 +66,25 @@ cc_library(
)
cc_library(
+ name = "graph_info",
+ hdrs = ["graph_info.h"],
+ deps = [":context"],
+)
+
+cc_library(
+ name = "memory_planner",
+ hdrs = ["memory_planner.h"],
+ deps = [":context"],
+)
+
+cc_library(
+ name = "simple_memory_arena",
+ srcs = ["simple_memory_arena.cc"],
+ hdrs = ["simple_memory_arena.h"],
+ deps = [":context"],
+)
+
+cc_library(
name = "builtin_op_data",
hdrs = [
"builtin_op_data.h",
@@ -70,7 +111,6 @@ cc_library(
"model.cc",
"nnapi_delegate.cc",
"optional_debug_tools.cc",
- "simple_memory_arena.cc",
],
hdrs = [
"allocation.h",
@@ -80,13 +120,16 @@ cc_library(
"model.h",
"nnapi_delegate.h",
"optional_debug_tools.h",
- "simple_memory_arena.h",
],
copts = tflite_copts(),
deps = [
+ ":arena_planner",
":builtin_op_data",
":context",
+ ":graph_info",
+ ":memory_planner",
":schema_fbs_version",
+ ":simple_memory_arena",
"//tensorflow/contrib/lite/kernels:gemm_support",
"//tensorflow/contrib/lite/nnapi:nnapi_lib",
"//tensorflow/contrib/lite/schema:schema_fbs",
@@ -134,7 +177,7 @@ cc_test(
size = "small",
srcs = ["simple_memory_arena_test.cc"],
deps = [
- ":framework",
+ ":simple_memory_arena",
"//tensorflow/contrib/lite/testing:util",
"@com_google_googletest//:gtest",
],
diff --git a/tensorflow/contrib/lite/arena_planner.cc b/tensorflow/contrib/lite/arena_planner.cc
new file mode 100644
index 0000000000..bf1bcdd1a7
--- /dev/null
+++ b/tensorflow/contrib/lite/arena_planner.cc
@@ -0,0 +1,247 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/arena_planner.h"
+
+namespace tflite {
+
+namespace {
+
+// Memory allocation tuning
+constexpr const int kDefaultArenaAlignment = 64;
+constexpr const int kDefaultTensorAlignment = 4;
+
+} // namespace
+
+struct AllocationInfo {
+ // The node index requesting this allocation.
+ int node;
+ // The tensor index to be allocated or deallocated.
+ int tensor;
+ // Whether to allocate or deallocate
+ enum { ALLOC, DEALLOC } type;
+};
+
+ArenaPlanner::ArenaPlanner(TfLiteContext* context,
+ std::unique_ptr<GraphInfo> graph_info)
+ : context_(context),
+ graph_info_(std::move(graph_info)),
+ arena_(kDefaultArenaAlignment),
+ persistent_arena_(kDefaultArenaAlignment) {}
+
+ArenaPlanner::~ArenaPlanner() {}
+
+int64_t ArenaPlanner::BasePointer(TfLiteAllocationType type) {
+ if (type == kTfLiteArenaRwPersistent) {
+ return persistent_arena_.BasePointer();
+ }
+ if (type == kTfLiteArenaRw) {
+ return arena_.BasePointer();
+ }
+ return 0;
+}
+
+TfLiteStatus ArenaPlanner::ResetAllocations() {
+ TF_LITE_ENSURE_STATUS(arena_.Clear());
+ TF_LITE_ENSURE_STATUS(persistent_arena_.Clear());
+ allocs_.clear();
+ allocs_.resize(graph_info_->num_tensors());
+ return kTfLiteOk;
+}
+
+TfLiteStatus ArenaPlanner::PlanAllocations() {
+ // Invalidate any existing data.
+ TF_LITE_ENSURE_STATUS(ResetAllocations());
+
+ // Keeps track of references to each tensor.
+ std::vector<int> refcounts(graph_info_->num_tensors(), 0);
+
+ // There will be an entry in alloc_queue_ for the allocation of each tensor
+ // and another for their deallocation.
+ alloc_queue_.reserve(2 * graph_info_->num_tensors());
+
+ // We must make sure the output tensors are never overwritten. We do that by
+ // artificially adding one to their ref-counts so they are never selected
+ // for deallocation.
+ for (int tensor_index : graph_info_->outputs()) {
+ refcounts[tensor_index]++;
+ }
+
+ // Count references to node input tensors.
+ for (int i = 0; i < graph_info_->num_nodes(); ++i) {
+ const TfLiteNode& node = graph_info_->node(i);
+ TfLiteIntArray* node_inputs = node.inputs;
+ for (int j = 0; j < node_inputs->size; ++j) {
+ int tensor_index = node_inputs->data[j];
+ if (tensor_index != kOptionalTensor) {
+ refcounts[tensor_index]++;
+ }
+ }
+ }
+
+ // Queue all graph inputs for allocation.
+ for (int tensor_index : graph_info_->inputs()) {
+ if (tensor_index != kOptionalTensor) {
+ alloc_queue_.push_back({0, tensor_index, AllocationInfo::ALLOC});
+ }
+ }
+
+ // Go through the graph in execution order.
+ for (int i = 0; i < graph_info_->num_nodes(); ++i) {
+ const TfLiteNode& node = graph_info_->node(i);
+
+ // First queue output tensors for allocation.
+ TfLiteIntArray* node_outputs = node.outputs;
+ for (int j = 0; j < node_outputs->size; ++j) {
+ int tensor_index = node_outputs->data[j];
+ alloc_queue_.push_back({i, tensor_index, AllocationInfo::ALLOC});
+ }
+
+ // Then update the ref-counts of the node's inputs, and if necessary queue
+ // them for deallocation.
+ TfLiteIntArray* node_inputs = node.inputs;
+ for (int j = 0; j < node_inputs->size; ++j) {
+ int tensor_index = node_inputs->data[j];
+ if (tensor_index != kOptionalTensor) {
+ refcounts[tensor_index]--;
+ if (refcounts[tensor_index] == 0) {
+ alloc_queue_.push_back({i, tensor_index, AllocationInfo::DEALLOC});
+ }
+ }
+ }
+ }
+
+ // Note that graph outputs will never be scheduled for deallocation. We
+ // could do that here for completeness, but it won't have any effect.
+ return kTfLiteOk;
+}
+
+TfLiteStatus ArenaPlanner::ExecuteAllocations(int first_node, int last_node) {
+ TF_LITE_ENSURE_STATUS(CalculateAllocations(first_node, last_node));
+ TF_LITE_ENSURE_STATUS(Commit());
+
+ for (int i = 0; i < graph_info_->num_tensors(); ++i) {
+ // TODO(ahentz): we could do this only for the tensors that were modified
+ // in CalculateAllocations(), instead of redoing it for tensors that
+ // already had proper pointers. However we must be very careful, because
+ // SimpleMemoryArena::Commit() could move the base pointer.
+ TF_LITE_ENSURE_STATUS(ResolveTensorAllocation(i));
+ }
+
+ return kTfLiteOk;
+}
+
+TfLiteStatus ArenaPlanner::Commit() {
+ TF_LITE_ENSURE_STATUS(arena_.Commit(context_));
+ TF_LITE_ENSURE_STATUS(persistent_arena_.Commit(context_));
+ return kTfLiteOk;
+}
+
+TfLiteStatus ArenaPlanner::CalculateAllocations(int first_node, int last_node) {
+ int active_node = first_node;
+ // When dynamic tensors are present this method is called multiple times.
+ // The items in the alloc_queue_ referring to nodes before first_node were
+ // processed previously and should be skipped. Entries after last_node are
+ // not yet ready to be handled.
+ for (const auto& alloc_info : alloc_queue_) {
+ if (alloc_info.node < first_node) continue;
+ if (alloc_info.node > last_node) break;
+ if (alloc_info.node == active_node) {
+ // This is the first allocation/deallocation for a given node. It is
+ // time to deallocate the previous temporaries and allocate new ones.
+ if (active_node != first_node) {
+ TF_LITE_ENSURE_STATUS(
+ CalculateDeallocationOfInternalTensors(active_node - 1));
+ }
+ TF_LITE_ENSURE_STATUS(CalculateAllocationOfInternalTensors(active_node));
+ ++active_node;
+ }
+ // Handle the current item.
+ if (alloc_info.type == AllocationInfo::ALLOC) {
+ TF_LITE_ENSURE_STATUS(CalculateTensorAllocation(alloc_info.tensor));
+ } else {
+ TF_LITE_ENSURE_STATUS(CalculateTensorDeallocation(alloc_info.tensor));
+ }
+ }
+
+ // Don't forget to deallocate temporaries of last node.
+ TF_LITE_ENSURE_STATUS(
+ CalculateDeallocationOfInternalTensors(active_node - 1));
+
+ return kTfLiteOk;
+}
+
+TfLiteStatus ArenaPlanner::ResolveTensorAllocation(int tensor_index) {
+ TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
+ if (tensor.allocation_type == kTfLiteArenaRw) {
+ TF_LITE_ENSURE_STATUS(
+ arena_.ResolveAlloc(context_, allocs_[tensor_index], &tensor.data.raw));
+ }
+ if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
+ TF_LITE_ENSURE_STATUS(persistent_arena_.ResolveAlloc(
+ context_, allocs_[tensor_index], &tensor.data.raw));
+ }
+ return kTfLiteOk;
+}
+
+TfLiteStatus ArenaPlanner::CalculateTensorAllocation(int tensor_index) {
+ TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
+ if (tensor.allocation_type == kTfLiteArenaRw) {
+ TF_LITE_ENSURE_STATUS(arena_.Allocate(context_, kDefaultTensorAlignment,
+ tensor.bytes,
+ &allocs_[tensor_index]));
+ }
+ if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
+ TF_LITE_ENSURE_STATUS(
+ persistent_arena_.Allocate(context_, kDefaultTensorAlignment,
+ tensor.bytes, &allocs_[tensor_index]));
+ }
+ return kTfLiteOk;
+}
+
+TfLiteStatus ArenaPlanner::CalculateTensorDeallocation(int tensor_index) {
+ TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
+ if (tensor.allocation_type == kTfLiteArenaRw) {
+ TF_LITE_ENSURE_STATUS(arena_.Deallocate(context_, allocs_[tensor_index]));
+ }
+ return kTfLiteOk;
+}
+
+TfLiteStatus ArenaPlanner::CalculateAllocationOfInternalTensors(
+ int node_index) {
+ if (node_index < graph_info_->num_nodes()) {
+ const TfLiteNode& node = graph_info_->node(node_index);
+ TfLiteIntArray* node_temporaries = node.temporaries;
+ for (int i = 0; i < node_temporaries->size; ++i) {
+ int tensor_index = node_temporaries->data[i];
+ TF_LITE_ENSURE_STATUS(CalculateTensorAllocation(tensor_index));
+ }
+ }
+ return kTfLiteOk;
+}
+
+TfLiteStatus ArenaPlanner::CalculateDeallocationOfInternalTensors(
+ int node_index) {
+ if (node_index < graph_info_->num_nodes()) {
+ const TfLiteNode& node = graph_info_->node(node_index);
+ TfLiteIntArray* node_temporaries = node.temporaries;
+ for (int i = 0; i < node_temporaries->size; ++i) {
+ int tensor_index = node_temporaries->data[i];
+ TF_LITE_ENSURE_STATUS(CalculateTensorDeallocation(tensor_index));
+ }
+ }
+ return kTfLiteOk;
+}
+
+} // namespace tflite
diff --git a/tensorflow/contrib/lite/arena_planner.h b/tensorflow/contrib/lite/arena_planner.h
new file mode 100644
index 0000000000..bd87414ec3
--- /dev/null
+++ b/tensorflow/contrib/lite/arena_planner.h
@@ -0,0 +1,107 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_ARENA_PLANNER_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_ARENA_PLANNER_H_
+
+#include <memory>
+#include <vector>
+
+#include "tensorflow/contrib/lite/context.h"
+#include "tensorflow/contrib/lite/graph_info.h"
+#include "tensorflow/contrib/lite/memory_planner.h"
+#include "tensorflow/contrib/lite/simple_memory_arena.h"
+
+namespace tflite {
+
+class AllocationInfo;
+
+// A memory planner that makes all the allocations using arenas.
+//
+// Before a model is executed by the interpreter, this class determines when
+// each tensor needs to be allocated and deallocated, and preallocates all the
+// necessary memory (the PlanAllocations phase). It then assigns portions of
+// this memory buffer to each tensor (the ExecuteAllocations phase). Tensors may
+// share some of the bufer if a tensor B is to be allocated after another tensor
+// A has been deallocated.
+//
+// If dynamic tensors are used the planning steps can be repeated during model
+// execution. Since dynamic tensors don't have sizes until after the
+// corresponding operation is executed, this class supports incremental
+// planning.
+class ArenaPlanner : public MemoryPlanner {
+ public:
+ // Ownership of 'context' is not taken and it must remain util the
+ // ArenaPlanner is destroyed.
+ ArenaPlanner(TfLiteContext* context, std::unique_ptr<GraphInfo> graph_info);
+ ~ArenaPlanner() override;
+ ArenaPlanner(const ArenaPlanner&) = delete;
+ ArenaPlanner& operator=(const ArenaPlanner&) = delete;
+
+ TfLiteStatus ResetAllocations() override;
+ TfLiteStatus PlanAllocations() override;
+ TfLiteStatus ExecuteAllocations(int first_node, int last_node) override;
+
+ // Returns the base arena location for a given allocation type.
+ int64_t BasePointer(TfLiteAllocationType type);
+
+ private:
+ // Make sure all the arenas have reserved enough memory to store all their
+ // tensors.
+ TfLiteStatus Commit();
+
+ // Traverse the allocation queue and reserve space in the appropriate arena
+ // for all tensors affected by ops in the interval [first_node, last_node].
+ TfLiteStatus CalculateAllocations(int first_node, int last_node);
+
+ // Assign absolute memory location to a tensor, based on its relative
+ // position inside the corresponding arena buffer.
+ TfLiteStatus ResolveTensorAllocation(int tensor_index);
+
+ // Register an allocation for the given tensor.
+ TfLiteStatus CalculateTensorAllocation(int tensor_index);
+
+ // Register a deallocation for the given tensor.
+ TfLiteStatus CalculateTensorDeallocation(int tensor_index);
+
+ // Register an allocation for all internal (temporary) tensors of
+ // 'node_index'.
+ TfLiteStatus CalculateAllocationOfInternalTensors(int node_index);
+
+ // Register a deallocation for all internal (temporary) tensors of
+ // 'node_index'.
+ TfLiteStatus CalculateDeallocationOfInternalTensors(int node_index);
+
+ TfLiteContext* context_;
+ std::unique_ptr<GraphInfo> graph_info_;
+
+ // Stores allocation data for all tensors.
+ std::vector<ArenaAlloc> allocs_;
+
+ // A chronological list of instructions to allocated and deallocate tensors,
+ // reflecting the way they are used in the graph.
+ std::vector<AllocationInfo> alloc_queue_;
+
+ // Raw memory buffer that is allocated for all temporary and graph outputs.
+ // that are declared kTfLiteArenaRw.
+ SimpleMemoryArena arena_;
+
+ // Raw memory buffer that is allocated for persistent tensors that are
+ // declared as kTfLiteArenaRwPersistent.
+ SimpleMemoryArena persistent_arena_;
+};
+
+} // namespace tflite
+
+#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_ARENA_PLANNER_H_
diff --git a/tensorflow/contrib/lite/arena_planner_test.cc b/tensorflow/contrib/lite/arena_planner_test.cc
new file mode 100644
index 0000000000..c27c327abc
--- /dev/null
+++ b/tensorflow/contrib/lite/arena_planner_test.cc
@@ -0,0 +1,472 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include "tensorflow/contrib/lite/arena_planner.h"
+
+#include <cstdarg>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+
+namespace tflite {
+namespace {
+
+// A simple op to be used in tests, as syntactic sugar.
+class TestOp {
+ public:
+ TestOp(std::initializer_list<int> inputs, std::initializer_list<int> outputs,
+ std::initializer_list<int> temporaries)
+ : inputs_(inputs), outputs_(outputs), temporaries_(temporaries) {}
+
+ const std::vector<int>& inputs() const { return inputs_; }
+ const std::vector<int>& outputs() const { return outputs_; }
+ const std::vector<int>& temporaries() const { return temporaries_; }
+
+ private:
+ std::vector<int> inputs_;
+ std::vector<int> outputs_;
+ std::vector<int> temporaries_;
+};
+
+// A test graph where inputs are processed by the given nodes to produce
+// outputs.
+class TestGraph {
+ public:
+ TestGraph(std::initializer_list<int> inputs,
+ std::initializer_list<TestOp> nodes,
+ std::initializer_list<int> outputs)
+ : inputs_(inputs), outputs_(outputs) {
+ int max_tensor_index = 0;
+
+ for (int t : inputs) {
+ max_tensor_index = std::max(max_tensor_index, t);
+ }
+ for (int t : outputs) {
+ max_tensor_index = std::max(max_tensor_index, t);
+ }
+ for (const auto& node : nodes) {
+ auto int_array = [](const std::vector<int>& x) {
+ TfLiteIntArray* lite = TfLiteIntArrayCreate(x.size());
+ for (size_t i = 0; i < x.size(); i++) lite->data[i] = x[i];
+ return lite;
+ };
+
+ nodes_.push_back(TfLiteNode());
+ nodes_.back().inputs = int_array(node.inputs());
+ for (int t : node.inputs()) {
+ max_tensor_index = std::max(max_tensor_index, t);
+ }
+ nodes_.back().outputs = int_array(node.outputs());
+ for (int t : node.outputs()) {
+ max_tensor_index = std::max(max_tensor_index, t);
+ }
+ nodes_.back().temporaries = int_array(node.temporaries());
+ for (int t : node.temporaries()) {
+ max_tensor_index = std::max(max_tensor_index, t);
+ }
+ }
+
+ for (int i = 0; i <= max_tensor_index; ++i) {
+ tensors_.push_back(TfLiteTensor());
+ // Set some default values for allocation_type and bytes, which are the
+ // only fields used by the arena planner.
+ tensors_.back().allocation_type = kTfLiteArenaRw;
+ tensors_.back().bytes = (i + 1) * 3;
+ }
+ }
+
+ ~TestGraph() {
+ for (auto node : nodes_) {
+ TfLiteIntArrayFree(node.inputs);
+ TfLiteIntArrayFree(node.outputs);
+ TfLiteIntArrayFree(node.temporaries);
+ }
+ }
+
+ const std::vector<TfLiteNode>& nodes() { return nodes_; }
+ std::vector<TfLiteTensor>* tensors() { return &tensors_; }
+ const std::vector<int>& inputs() { return inputs_; }
+ const std::vector<int>& outputs() { return outputs_; }
+
+ private:
+ std::vector<TfLiteNode> nodes_;
+ std::vector<TfLiteTensor> tensors_;
+ std::vector<int> inputs_;
+ std::vector<int> outputs_;
+};
+
+// The GraphInfo for a TestGraph.
+class TestGraphInfo : public GraphInfo {
+ public:
+ explicit TestGraphInfo(TestGraph* graph) : graph_(graph) {}
+
+ size_t num_tensors() const override { return graph_->tensors()->size(); }
+ TfLiteTensor* tensor(size_t index) override {
+ return &graph_->tensors()->at(index);
+ }
+ size_t num_nodes() const override { return graph_->nodes().size(); }
+ const TfLiteNode& node(size_t index) const override {
+ return graph_->nodes()[index];
+ }
+ const std::vector<int>& inputs() const override { return graph_->inputs(); }
+ const std::vector<int>& outputs() const override { return graph_->outputs(); }
+
+ private:
+ TestGraph* graph_;
+};
+
+void ReportError(TfLiteContext* context, const char* format, ...) {
+ const size_t kBufferSize = 1024;
+ char temp_buffer[kBufferSize];
+
+ va_list args;
+ va_start(args, format);
+ vsnprintf(temp_buffer, kBufferSize, format, args);
+ va_end(args);
+
+ LOG(INFO) << temp_buffer;
+}
+
+class ArenaPlannerTest : public ::testing::Test {
+ protected:
+ void SetGraph(TestGraph* graph) {
+ graph_ = graph;
+ context_.ReportError = ReportError;
+ planner_.reset(new ArenaPlanner(
+ &context_, std::unique_ptr<GraphInfo>(new TestGraphInfo(graph))));
+ CHECK(planner_->ResetAllocations() == kTfLiteOk);
+ CHECK(planner_->PlanAllocations() == kTfLiteOk);
+ }
+
+ void Execute(int start, int end) {
+ CHECK(planner_->ExecuteAllocations(start, end) == kTfLiteOk);
+ }
+
+ // Returns the actual offset of a given tensor, relative to the start of its
+ // arena.
+ int64_t GetOffset(int tensor_index) {
+ const TfLiteTensor& tensor = (*graph_->tensors())[tensor_index];
+ return reinterpret_cast<int64_t>(tensor.data.raw) -
+ planner_->BasePointer(tensor.allocation_type);
+ }
+
+ // Returns the first aligned offset after a given tensor.
+ int64_t GetOffsetAfter(int tensor_index) {
+ const TfLiteTensor& tensor = (*graph_->tensors())[tensor_index];
+ int64_t offset = GetOffset(tensor_index) + tensor.bytes;
+ // We must make sure the offset is aligned to kDefaultArenaAlignment.
+ if (offset % 4 != 0) {
+ offset += 4 - offset % 4;
+ }
+ return offset;
+ };
+
+ TfLiteContext context_;
+ TestGraph* graph_;
+ std::unique_ptr<ArenaPlanner> planner_;
+};
+
+TEST_F(ArenaPlannerTest, EmptyGraph) {
+ TestGraph graph({}, {}, {});
+ SetGraph(&graph);
+ Execute(0, 10);
+}
+
+TEST_F(ArenaPlannerTest, GraphWithNoOps) {
+ TestGraph graph({0, 10}, {}, {5, 11});
+ SetGraph(&graph);
+ Execute(0, 10);
+ EXPECT_EQ(GetOffset(0), 0);
+ EXPECT_EQ(GetOffset(10), GetOffsetAfter(0));
+ // The outputs are never allocated because they are not connected to any
+ // inputs.
+ EXPECT_EQ(GetOffset(5), 0);
+ EXPECT_EQ(GetOffset(11), 0);
+}
+
+TEST_F(ArenaPlannerTest, GraphWithOneOp) {
+ TestGraph graph({1}, {{{1}, {2}, {}}}, {2});
+ SetGraph(&graph);
+ Execute(0, 10);
+ EXPECT_EQ(GetOffset(1), 0);
+ EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+}
+
+TEST_F(ArenaPlannerTest, ZeroSizedTensors) {
+ TestGraph graph({1}, {{{1}, {2}, {}}}, {2});
+ (*graph.tensors())[1].bytes = 0;
+ SetGraph(&graph);
+ // TODO(ahentz): this is currently broken because the arena finds two
+ // allocations with the same offset and returns an error.
+ ASSERT_FALSE(planner_->ExecuteAllocations(0, 10) == kTfLiteOk);
+ // EXPECT_EQ(GetOffset(1), 0);
+ // EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+}
+
+TEST_F(ArenaPlannerTest, SimpleGraph) {
+ TestGraph graph({0, 1},
+ {
+ /* in, out, tmp */
+ {{0, 1}, {2}, {}}, // First op
+ {{2, 0}, {4, 5}, {}}, // Second op
+ {{4, 5}, {3}, {}} // Third op
+ },
+ {3});
+ SetGraph(&graph);
+ Execute(0, 10);
+
+ // Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +4 +5 -2 -0 +3 -4 -5
+ EXPECT_EQ(GetOffset(0), 0);
+ EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
+ EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+ EXPECT_EQ(GetOffset(4), GetOffsetAfter(2));
+ EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
+ EXPECT_EQ(GetOffset(3), 0);
+}
+
+TEST_F(ArenaPlannerTest, SimpleGraphWithTemporary) {
+ TestGraph graph({0, 1},
+ {
+ /* in, out, tmp */
+ {{0, 1}, {2}, {}}, // First op
+ {{2, 0}, {4}, {5}}, // Second op, with temporary
+ {{4}, {3}, {}} // Third op
+ },
+ {3});
+ SetGraph(&graph);
+ Execute(0, 10);
+
+ // Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4
+ EXPECT_EQ(GetOffset(0), 0);
+ EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
+ EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+ EXPECT_EQ(GetOffset(5), GetOffsetAfter(2));
+ EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
+ EXPECT_EQ(GetOffset(3), 0);
+}
+
+TEST_F(ArenaPlannerTest, SimpleGraphWithOptionals) {
+ TestGraph graph({0, -1, 1},
+ {
+ /* in, out, tmp */
+ {{0, 1}, {2}, {}}, // First op
+ {{2, 0}, {4, 5}, {}}, // Second op
+ {{4, -1, 5}, {3}, {}} // Third op, with optional
+ },
+ {3});
+ SetGraph(&graph);
+ Execute(0, 10);
+
+ // Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +4 +5 -2 -0 +3 -4 -5
+ EXPECT_EQ(GetOffset(0), 0);
+ EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
+ EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+ EXPECT_EQ(GetOffset(4), GetOffsetAfter(2));
+ EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
+ EXPECT_EQ(GetOffset(3), 0);
+}
+
+TEST_F(ArenaPlannerTest, SimpleGraphWithLargeTensor) {
+ TestGraph graph({0, -1, 1},
+ {
+ /* in, out, tmp */
+ {{0, 1}, {2}, {}}, // First op
+ {{2, 0}, {4}, {5}}, // Second op, with temporary
+ {{4, -1}, {3}, {}} // Third op, with optional
+ },
+ {3});
+
+ // Make #1 very large so its vacancy can be filled with #5 and #4.
+ (*graph.tensors())[1].bytes = 40;
+
+ SetGraph(&graph);
+ Execute(0, 10);
+
+ // Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4
+ EXPECT_EQ(GetOffset(0), 0);
+ EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
+ EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+ EXPECT_EQ(GetOffset(5), GetOffsetAfter(0));
+ EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
+ EXPECT_EQ(GetOffset(3), 0);
+}
+
+TEST_F(ArenaPlannerTest, SimpleGraphWithPersistentTensor) {
+ TestGraph graph({0, -1, 1},
+ {
+ /* in, out, tmp */
+ {{0, 1}, {2}, {}}, // First op
+ {{2, 0}, {4}, {5}}, // Second op, with temporary
+ {{4, -1}, {3}, {}} // Third op, with optional
+ },
+ {3});
+
+ // Make #1 persistent so it goes into its own arena.
+ (*graph.tensors())[1].allocation_type = kTfLiteArenaRwPersistent;
+
+ SetGraph(&graph);
+ Execute(0, 10);
+
+ // Make sure #0 and #1 were given different memory locations (because they
+ // will both have offset=0, in different arenas.)
+ EXPECT_NE((*graph.tensors())[0].data.raw, (*graph.tensors())[1].data.raw);
+
+ // Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4
+ EXPECT_EQ(GetOffset(0), 0);
+ EXPECT_EQ(GetOffset(1), 0);
+ EXPECT_EQ(GetOffset(2), GetOffsetAfter(0));
+ EXPECT_EQ(GetOffset(5), GetOffsetAfter(2));
+ EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
+ EXPECT_EQ(GetOffset(3), 0);
+}
+
+TEST_F(ArenaPlannerTest, SimpleGraphWithDynamicTensor) {
+ TestGraph graph({0, -1, 1},
+ {
+ /* in, out, tmp */
+ {{0, 1}, {2}, {}}, // First op
+ {{2, 0}, {4}, {5}}, // Second op, with temporary
+ {{4, -1}, {3}, {}} // Third op, with optional
+ },
+ {3});
+
+ // Make #1 dynaic so it does not get allocated.
+ (*graph.tensors())[1].allocation_type = kTfLiteDynamic;
+
+ SetGraph(&graph);
+ Execute(0, 10);
+
+ EXPECT_EQ((*graph.tensors())[1].data.raw, nullptr);
+
+ // Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4
+ EXPECT_EQ(GetOffset(0), 0);
+ EXPECT_EQ(GetOffset(2), GetOffsetAfter(0));
+ EXPECT_EQ(GetOffset(5), GetOffsetAfter(2));
+ EXPECT_EQ(GetOffset(4), GetOffsetAfter(5));
+ EXPECT_EQ(GetOffset(3), 0);
+}
+
+TEST_F(ArenaPlannerTest, LargerGraphAndStepwiseAllocation) {
+ TestGraph graph({0, 1},
+ {
+ /* in, out, tmp */
+ {{0, 1}, {2, 3}, {}},
+ {{2, 0}, {4, 5}, {6}},
+ {{1, -1}, {7}, {}},
+ {{7, 3}, {8}, {9}},
+ {{4, 5, 8}, {10}, {}},
+ },
+ {10});
+ SetGraph(&graph);
+
+ auto is_unallocated = [&](int tensor_index) {
+ // TODO(ahentz): We'd to use nullptr to represent unallocated tensors, but
+ // the current code still points them all to the beginning fo the alloc
+ // (that is, zero offset).
+ // return (*graph.tensors())[tensor_index].data.raw == nullptr;
+ return GetOffset(tensor_index) == 0;
+ };
+
+ // The allocation plan is made at the beginning and is independent of
+ // the execution steps. Here's the allocation order:
+ // Op0: +0 +1 +2 +3
+ // Op1: +6 +4 +5 -6 -0 -2
+ // Op2: +7 -1
+ // Op3: +9 +8 -9 -3 -7
+ // Op4: +10 -4 -5 -8
+
+ Execute(0, 0);
+ EXPECT_EQ(GetOffset(0), 0);
+ EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
+ EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+ EXPECT_EQ(GetOffset(3), GetOffsetAfter(2));
+ EXPECT_TRUE(is_unallocated(6));
+ EXPECT_TRUE(is_unallocated(4));
+ EXPECT_TRUE(is_unallocated(5));
+ EXPECT_TRUE(is_unallocated(7));
+ EXPECT_TRUE(is_unallocated(9));
+ EXPECT_TRUE(is_unallocated(8));
+ EXPECT_TRUE(is_unallocated(10));
+
+ Execute(1, 1);
+ EXPECT_EQ(GetOffset(0), 0);
+ EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
+ EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+ EXPECT_EQ(GetOffset(3), GetOffsetAfter(2));
+ EXPECT_EQ(GetOffset(6), GetOffsetAfter(3));
+ EXPECT_EQ(GetOffset(4), GetOffsetAfter(6));
+ EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
+ EXPECT_TRUE(is_unallocated(7));
+ EXPECT_TRUE(is_unallocated(9));
+ EXPECT_TRUE(is_unallocated(8));
+ EXPECT_TRUE(is_unallocated(10));
+
+ Execute(2, 2);
+ EXPECT_EQ(GetOffset(0), 0);
+ EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
+ EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+ EXPECT_EQ(GetOffset(3), GetOffsetAfter(2));
+ EXPECT_EQ(GetOffset(6), GetOffsetAfter(3));
+ EXPECT_EQ(GetOffset(4), GetOffsetAfter(6));
+ EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
+ // Here's an interesting allocation. Even though #6 requires only 21 bytes,
+ // its deallocation freed up 24 bytes due to the alignment requirements in
+ // the arena. That means we can fit #7 in the same space!
+ EXPECT_EQ(GetOffset(7), GetOffsetAfter(3));
+ EXPECT_TRUE(is_unallocated(9));
+ EXPECT_TRUE(is_unallocated(8));
+ EXPECT_TRUE(is_unallocated(10));
+
+ Execute(3, 3);
+ EXPECT_EQ(GetOffset(0), 0);
+ EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
+ EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+ EXPECT_EQ(GetOffset(3), GetOffsetAfter(2));
+ EXPECT_EQ(GetOffset(6), GetOffsetAfter(3));
+ EXPECT_EQ(GetOffset(4), GetOffsetAfter(6));
+ EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
+ EXPECT_EQ(GetOffset(7), GetOffsetAfter(3));
+ // The deallocation of #0, #1 and #2 freed up 24 bytes but that's not enough
+ // for #9, so it goes at the end.
+ EXPECT_EQ(GetOffset(9), GetOffsetAfter(5));
+ EXPECT_EQ(GetOffset(8), GetOffsetAfter(9));
+ EXPECT_TRUE(is_unallocated(10));
+
+ Execute(4, 4);
+ EXPECT_EQ(GetOffset(0), 0);
+ EXPECT_EQ(GetOffset(1), GetOffsetAfter(0));
+ EXPECT_EQ(GetOffset(2), GetOffsetAfter(1));
+ EXPECT_EQ(GetOffset(3), GetOffsetAfter(2));
+ EXPECT_EQ(GetOffset(6), GetOffsetAfter(3));
+ EXPECT_EQ(GetOffset(4), GetOffsetAfter(6));
+ EXPECT_EQ(GetOffset(5), GetOffsetAfter(4));
+ EXPECT_EQ(GetOffset(7), GetOffsetAfter(3));
+ EXPECT_EQ(GetOffset(9), GetOffsetAfter(5));
+ EXPECT_EQ(GetOffset(8), GetOffsetAfter(9));
+ // There's just enough space at the beginning for #10 due to the
+ // deallocation of #0, #1, #2 and #3 (total 36 bytes, #10 needs
+ // only 33.)
+ EXPECT_EQ(GetOffset(10), 0);
+}
+
+} // namespace
+} // namespace tflite
+
+int main(int argc, char** argv) {
+ // ::tflite::LogToStderr();
+ FLAGS_logtostderr = true;
+
+ ::testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+}
diff --git a/tensorflow/contrib/lite/graph_info.h b/tensorflow/contrib/lite/graph_info.h
new file mode 100644
index 0000000000..5481aede60
--- /dev/null
+++ b/tensorflow/contrib/lite/graph_info.h
@@ -0,0 +1,53 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_GRAPH_INFO_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_GRAPH_INFO_H_
+
+#include <vector>
+
+#include "tensorflow/contrib/lite/context.h"
+
+namespace tflite {
+
+// Basic information about an inference graph, where execution nodes
+// are connected via tensors.
+class GraphInfo {
+ public:
+ virtual ~GraphInfo() {}
+
+ // Total number of tensors in the graph.
+ virtual size_t num_tensors() const = 0;
+
+ // Returns a tensor given its index which is expected to be between 0 and
+ // num_tensors().
+ virtual TfLiteTensor* tensor(size_t index) = 0;
+
+ // Total number of nodes in the graph.
+ virtual size_t num_nodes() const = 0;
+
+ // Returns a node given its index which is expected to be between 0 and
+ // num_nodes().
+ virtual const TfLiteNode& node(size_t index) const = 0;
+
+ // Returns the indices of the input tensors.
+ virtual const std::vector<int>& inputs() const = 0;
+
+ // Returns the indices of the output tensors.
+ virtual const std::vector<int>& outputs() const = 0;
+};
+
+} // namespace tflite
+
+#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_GRAPH_INFO_H_
diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc
index 954e236ac8..5f5981e45a 100644
--- a/tensorflow/contrib/lite/interpreter.cc
+++ b/tensorflow/contrib/lite/interpreter.cc
@@ -18,16 +18,16 @@ limitations under the License.
#include <cstdarg>
#include <cstdint>
#include <cstring>
+#include "tensorflow/contrib/lite/arena_planner.h"
#include "tensorflow/contrib/lite/context.h"
#include "tensorflow/contrib/lite/error_reporter.h"
+#include "tensorflow/contrib/lite/graph_info.h"
#include "tensorflow/contrib/lite/kernels/gemm_support.h"
+#include "tensorflow/contrib/lite/memory_planner.h"
#include "tensorflow/contrib/lite/nnapi_delegate.h"
namespace {
-// Memory allocation tuning
-constexpr const int kDefaultArenaAlignment = 64;
-constexpr const int kDefaultTensorAlignment = 4;
// std::vector preallocation tuning.
constexpr const int kSlotsToReserve = 128;
@@ -35,10 +35,33 @@ constexpr const int kSlotsToReserve = 128;
namespace tflite {
+// A trivial implementation of GraphInfo around the Interpreter.
+class InterpreterInfo : public GraphInfo {
+ public:
+ explicit InterpreterInfo(Interpreter* interpreter)
+ : interpreter_(interpreter) {}
+
+ size_t num_tensors() const override { return interpreter_->tensors_size(); }
+ TfLiteTensor* tensor(size_t index) override {
+ return interpreter_->tensor(index);
+ }
+ size_t num_nodes() const override { return interpreter_->nodes_size(); }
+ const TfLiteNode& node(size_t index) const override {
+ return interpreter_->node_and_registration(index)->first;
+ }
+ const std::vector<int>& inputs() const override {
+ return interpreter_->inputs();
+ }
+ const std::vector<int>& outputs() const override {
+ return interpreter_->outputs();
+ }
+
+ public:
+ Interpreter* interpreter_;
+};
+
Interpreter::Interpreter(ErrorReporter* error_reporter)
- : arena_(kDefaultArenaAlignment),
- persistent_arena_(kDefaultArenaAlignment),
- error_reporter_(error_reporter ? error_reporter
+ : error_reporter_(error_reporter ? error_reporter
: DefaultErrorReporter()) {
context_.impl_ = static_cast<void*>(this);
context_.ResizeTensor = ResizeTensor;
@@ -50,7 +73,7 @@ Interpreter::Interpreter(ErrorReporter* error_reporter)
// Reserve some space for the tensors to avoid excessive resizing.
tensors_.reserve(kSlotsToReserve);
nodes_and_registration_.reserve(kSlotsToReserve);
- next_allocate_node_id_ = 0;
+ next_node_to_prepare_ = 0;
UseNNAPI(false);
}
@@ -128,181 +151,6 @@ TfLiteStatus Interpreter::BytesRequired(TfLiteType type, const int* dims,
return kTfLiteOk;
}
-TfLiteStatus Interpreter::AllocateTensorsWhoseSizesAreKnown() {
- if (!consistent_) {
- ReportError(&context_, "AllocateTensors() called on inconsistent model.");
- return kTfLiteError;
- }
- if (next_allocate_node_id_ == nodes_and_registration_.size() && invokable_) {
- return kTfLiteOk;
- }
- allocs_and_refcounts_.resize(context_.tensors_size);
-
- int new_next_allocate_node_id = next_allocate_node_id_;
- invokable_ = false;
-
- // Allocate graph input nodes.
- if (next_allocate_node_id_ == 0) {
- for (int i = 0; i < inputs_.size(); ++i) {
- int tensor_index = inputs_[i];
- if (tensor_index == kOptionalTensor) {
- continue;
- }
- TfLiteTensor& tensor = context_.tensors[tensor_index];
- if (tensor.allocation_type == kTfLiteArenaRw) {
- TF_LITE_ENSURE_OK(
- &context_,
- arena_.Allocate(&context_, kDefaultTensorAlignment, tensor.bytes,
- &allocs_and_refcounts_[tensor_index].alloc));
- }
- }
- // Add 1 to output tensors, so they will not get overwritten.
- for (int i = 0; i < outputs_.size(); ++i) {
- allocs_and_refcounts_[outputs_[i]].count++;
- }
- }
-
- // Count references to node input tensors, and resize node-referenced tensors
- // until we encounter a node that has a dynamic output tensor.
- for (int k = next_allocate_node_id_; k < nodes_and_registration_.size();
- k++) {
- new_next_allocate_node_id++;
- TfLiteNode& node = nodes_and_registration_[k].first;
- const TfLiteRegistration& registration = nodes_and_registration_[k].second;
- if (OpPrepare(registration, &node) == kTfLiteError) {
- return kTfLiteError;
- }
-
- TfLiteIntArray* node_inputs = node.inputs;
- for (int i = 0; i < node_inputs->size; ++i) {
- int tensor_index = node_inputs->data[i];
- if (tensor_index != kOptionalTensor) {
- allocs_and_refcounts_[node_inputs->data[i]].count++;
- }
- }
-
- // Discontinue if the node has dynamic outputs.
- bool has_unallocated_dynamic_tensor = false;
- TfLiteIntArray* node_outputs = node.outputs;
- for (int i = 0; i < node_outputs->size; ++i) {
- TfLiteTensor& tensor = context_.tensors[node_outputs->data[i]];
- if (tensor.allocation_type == kTfLiteDynamic) {
- has_unallocated_dynamic_tensor = true;
- break;
- }
- }
- if (has_unallocated_dynamic_tensor) {
- break;
- }
- }
-
- // Allocate graph persistent outputs, e.g. RNN cell states, etc.
- for (int k = next_allocate_node_id_; k < new_next_allocate_node_id; k++) {
- TfLiteNode& node = nodes_and_registration_[k].first;
-
- // Go through output tensors and allocate the persistent ones first.
- TfLiteIntArray* node_outputs = node.outputs;
- for (int i = 0; i < node_outputs->size; ++i) {
- int tensor_index = node_outputs->data[i];
- TfLiteTensor& tensor = context_.tensors[tensor_index];
- if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
- TF_LITE_ENSURE_OK(&context_,
- persistent_arena_.Allocate(
- &context_, kDefaultTensorAlignment, tensor.bytes,
- &allocs_and_refcounts_[tensor_index].alloc));
- }
- }
- }
-
- // Go through the graph in execution order.
- for (int k = next_allocate_node_id_; k < new_next_allocate_node_id; k++) {
- TfLiteNode& node = nodes_and_registration_[k].first;
-
- // First allocate output tensors.
- TfLiteIntArray* node_outputs = node.outputs;
- for (int i = 0; i < node_outputs->size; ++i) {
- int tensor_index = node_outputs->data[i];
- TfLiteTensor& tensor = context_.tensors[tensor_index];
- if (tensor.allocation_type == kTfLiteArenaRw) {
- TF_LITE_ENSURE_OK(
- &context_,
- arena_.Allocate(&context_, kDefaultTensorAlignment, tensor.bytes,
- &allocs_and_refcounts_[tensor_index].alloc));
- }
- }
- // Then the temporaries, in two passes. First allocate them all, them
- // deallocate them.
- TfLiteIntArray* node_temporaries = node.temporaries;
- for (int i = 0; i < node_temporaries->size; ++i) {
- int tensor_index = node_temporaries->data[i];
- TfLiteTensor& tensor = context_.tensors[tensor_index];
- if (tensor.allocation_type == kTfLiteArenaRw) {
- TF_LITE_ENSURE_OK(
- &context_,
- arena_.Allocate(&context_, kDefaultTensorAlignment, tensor.bytes,
- &allocs_and_refcounts_[tensor_index].alloc));
- }
- }
- for (int i = 0; i < node_temporaries->size; ++i) {
- int tensor_index = node_temporaries->data[i];
- TfLiteTensor& tensor = context_.tensors[tensor_index];
- allocs_and_refcounts_[tensor_index].count--;
- if (tensor.allocation_type == kTfLiteArenaRw &&
- allocs_and_refcounts_[tensor_index].count == 0) {
- TF_LITE_ENSURE_OK(
- &context_,
- arena_.Deallocate(&context_,
- allocs_and_refcounts_[tensor_index].alloc));
- }
- }
-
- // Then process the node's inputs.
- TfLiteIntArray* node_inputs = node.inputs;
- for (int i = 0; i < node_inputs->size; ++i) {
- int tensor_index = node_inputs->data[i];
- if (tensor_index == kOptionalTensor) {
- continue;
- }
- TfLiteTensor& tensor = context_.tensors[tensor_index];
-
- // Decrease reference count and deallocate if not needed anymore.
- allocs_and_refcounts_[tensor_index].count--;
- if (tensor.allocation_type == kTfLiteArenaRw &&
- allocs_and_refcounts_[tensor_index].count == 0) {
- TF_LITE_ENSURE_OK(
- &context_,
- arena_.Deallocate(&context_,
- allocs_and_refcounts_[tensor_index].alloc));
- }
- }
- }
-
- // Resize the buffer and commit the arena.
- TF_LITE_ENSURE_OK(&context_, arena_.Commit(&context_));
- TF_LITE_ENSURE_OK(&context_, persistent_arena_.Commit(&context_));
-
- // Rewire the tensors to use the underlying arena buffer.
- for (int i = 0; i < context_.tensors_size; ++i) {
- TfLiteTensor& tensor = context_.tensors[i];
- if (tensor.allocation_type == kTfLiteArenaRw) {
- TF_LITE_ENSURE_OK(
- &context_,
- arena_.ResolveAlloc(&context_, allocs_and_refcounts_[i].alloc,
- &tensor.data.raw));
- }
- if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
- TF_LITE_ENSURE_OK(
- &context_,
- persistent_arena_.ResolveAlloc(
- &context_, allocs_and_refcounts_[i].alloc, &tensor.data.raw));
- }
- }
-
- invokable_ = true;
- next_allocate_node_id_ = new_next_allocate_node_id;
- return kTfLiteOk;
-}
-
namespace {
TfLiteIntArray* convertVectorToTfLiteIntArray(const std::vector<int>& x) {
TfLiteIntArray* lite = TfLiteIntArrayCreate(x.size());
@@ -312,11 +160,19 @@ TfLiteIntArray* convertVectorToTfLiteIntArray(const std::vector<int>& x) {
} // namespace
TfLiteStatus Interpreter::AllocateTensors() {
- next_allocate_node_id_ = 0;
- TF_LITE_ENSURE_OK(&context_, arena_.Clear());
- TF_LITE_ENSURE_OK(&context_, persistent_arena_.Clear());
- allocs_and_refcounts_.clear();
- return AllocateTensorsWhoseSizesAreKnown();
+ next_node_to_prepare_ = 0;
+ if (memory_planner_) {
+ TF_LITE_ENSURE_STATUS(memory_planner_->ResetAllocations());
+ }
+
+ if (!consistent_) {
+ ReportError(&context_, "AllocateTensors() called on inconsistent model.");
+ return kTfLiteError;
+ }
+
+ TF_LITE_ENSURE_STATUS(PrepareOpsAndTensors());
+ invokable_ = true;
+ return kTfLiteOk;
}
TfLiteStatus Interpreter::AddNodeWithParameters(
@@ -372,6 +228,57 @@ TfLiteStatus Interpreter::ResizeInputTensor(int tensor_index,
return ResizeTensorImpl(&context_.tensors[tensor_index], dims_lite);
}
+// Returns true if at least one tensor in the given list is kTfLiteDynamic.
+bool HasDynamicTensor(const TfLiteContext& context,
+ const TfLiteIntArray* tensors) {
+ for (int i = 0; i < tensors->size; ++i) {
+ const TfLiteTensor& tensor = context.tensors[tensors->data[i]];
+ if (tensor.allocation_type == kTfLiteDynamic) {
+ return true;
+ }
+ }
+ return false;
+}
+
+TfLiteStatus Interpreter::PrepareOpsStartingAt(int first_node,
+ int* last_node_prepared) {
+ for (int i = first_node; i < nodes_and_registration_.size(); i++) {
+ TfLiteNode& node = nodes_and_registration_[i].first;
+ const TfLiteRegistration& registration = nodes_and_registration_[i].second;
+ if (OpPrepare(registration, &node) == kTfLiteError) {
+ return kTfLiteError;
+ }
+
+ *last_node_prepared = i;
+
+ // Discontinue if the node has dynamic outputs. Note that we don't
+ // stop for dynamic temporary tensors since they won't affect the
+ // sizes of other tensors in the graph.
+ if (HasDynamicTensor(context_, node.outputs)) {
+ break;
+ }
+ }
+ return kTfLiteOk;
+}
+
+TfLiteStatus Interpreter::PrepareOpsAndTensors() {
+ if (!memory_planner_) {
+ memory_planner_.reset(new ArenaPlanner(
+ &context_, std::unique_ptr<GraphInfo>(new InterpreterInfo(this))));
+ memory_planner_->PlanAllocations();
+ }
+
+ int last_node_prepared = 0;
+
+ TF_LITE_ENSURE_STATUS(
+ PrepareOpsStartingAt(next_node_to_prepare_, &last_node_prepared));
+ TF_LITE_ENSURE_STATUS(memory_planner_->ExecuteAllocations(
+ next_node_to_prepare_, last_node_prepared));
+
+ next_node_to_prepare_ = last_node_prepared + 1;
+ return kTfLiteOk;
+}
+
TfLiteStatus Interpreter::Invoke() {
if (!consistent_) {
ReportError(&context_, "Invoke called on model that is not consistent.");
@@ -384,10 +291,8 @@ TfLiteStatus Interpreter::Invoke() {
TfLiteStatus status = kTfLiteOk;
if (nnapi_delegate_) {
- if (AllocateTensorsWhoseSizesAreKnown() == kTfLiteError) {
- return kTfLiteError;
- }
- if (next_allocate_node_id_ == nodes_and_registration_.size()) {
+ TF_LITE_ENSURE_STATUS(PrepareOpsAndTensors());
+ if (next_node_to_prepare_ == nodes_and_registration_.size()) {
TF_LITE_ENSURE_OK(&context_, nnapi_delegate_->Invoke(this));
return kTfLiteOk;
} else {
@@ -400,14 +305,17 @@ TfLiteStatus Interpreter::Invoke() {
}
}
+ // Invocations are always done in node order.
+ // Note that calling Invoke repeatedly will cause the original memory plan to
+ // be reused, unless either ResizeInputTensor() or AllocateTensors() has been
+ // called.
+ // TODO(b/71913981): we should force recalculation in the presence of dynamic
+ // tensors, because they may have new value which in turn may affect shapes
+ // and allocations.
for (int i = 0; i < nodes_and_registration_.size(); i++) {
- // Ensure we have allocated up to this node. The point of this is to
- // allocate as much as possible before running any evaluation, but
- // dynamic shapes can prevent this from being possible.
- if (i >= next_allocate_node_id_) {
- if (AllocateTensorsWhoseSizesAreKnown() == kTfLiteError) {
- return kTfLiteError;
- }
+ if (i == next_node_to_prepare_) {
+ TF_LITE_ENSURE_STATUS(PrepareOpsAndTensors());
+ TF_LITE_ENSURE(&context_, next_node_to_prepare_ >= i);
}
TfLiteNode& node = nodes_and_registration_[i].first;
const TfLiteRegistration& registration = nodes_and_registration_[i].second;
diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h
index 65c61e44be..38dd402e8a 100644
--- a/tensorflow/contrib/lite/interpreter.h
+++ b/tensorflow/contrib/lite/interpreter.h
@@ -23,7 +23,7 @@ limitations under the License.
#include "tensorflow/contrib/lite/allocation.h"
#include "tensorflow/contrib/lite/context.h"
#include "tensorflow/contrib/lite/error_reporter.h"
-#include "tensorflow/contrib/lite/simple_memory_arena.h"
+#include "tensorflow/contrib/lite/memory_planner.h"
namespace tflite {
@@ -49,13 +49,6 @@ constexpr TfLiteType typeToTfLiteType<unsigned char>() {
return kTfLiteUInt8;
}
-struct ArenaAllocRefCount {
- ArenaAllocRefCount() : alloc(), count(0) {}
-
- ArenaAlloc alloc;
- int count;
-};
-
// Forward declare since NNAPIDelegate uses Interpreter.
class NNAPIDelegate;
@@ -276,9 +269,17 @@ class Interpreter {
return op_reg.invoke(&context_, node);
}
- // Allocate tensors whose sizes are known in order of nodes. Discontinue when
- // we encounter a node that has a dynamic output tensor.
- TfLiteStatus AllocateTensorsWhoseSizesAreKnown();
+ // Call OpPrepare() for as many ops as possible, allocating memory for their
+ // tensors. If an op containing dynamic tensors is found, preparation will be
+ // postponed until this function is called again. This allows the interpreter
+ // to wait until Invoke() to resolve the sizes of dynamic tensors.
+ TfLiteStatus PrepareOpsAndTensors();
+
+ // Call OpPrepare() for all ops starting at 'first_node'. Stop when a
+ // dynamic tensors is found or all ops have been prepared. Fill
+ // 'last_node_prepared' with the id of the op containing dynamic tensors, or
+ // the last in the graph.
+ TfLiteStatus PrepareOpsStartingAt(int first_node, int* last_node_prepared);
// Tensors needed by the interpreter. Use `AddTensors` to add more blank
// tensor entries. Note, `tensors_.data()` needs to be synchronized to the
@@ -325,17 +326,6 @@ class Interpreter {
std::vector<std::pair<TfLiteNode, TfLiteRegistration>>
nodes_and_registration_;
- // Raw memory buffer that is allocated for all temporary and graph outputs.
- // that are declared kTfLiteArenaRw.
- SimpleMemoryArena arena_;
-
- // Raw memory buffer that is allocated for persistent tensors that are
- // declared as kTfLiteArenaRwPersistent.
- SimpleMemoryArena persistent_arena_;
-
- // Stores allocation and reference counts of all tensors.
- std::vector<ArenaAllocRefCount> allocs_and_refcounts_;
-
// Whether the model is consistent. That is to say if the inputs and outputs
// of every node and the global inputs and outputs are valid indexes into
// the tensor array.
@@ -356,7 +346,7 @@ class Interpreter {
// The error reporter delegate that tflite will forward queries errors to.
ErrorReporter* error_reporter_;
- // Next node to allocate output tensors.
+ // Index of the next node to prepare.
// During Invoke(), Interpreter will allocate input tensors first, which are
// known to be fixed size. Then it will allocate outputs from nodes as many
// as possible. When there is a node that produces dynamic sized tensor.
@@ -364,10 +354,12 @@ class Interpreter {
// node id, and execute the node to generate the output tensor before continue
// to allocate successors. This process repeats until all nodes are executed.
// NOTE: this relies on the order of nodes that is in topological order.
- int next_allocate_node_id_;
+ int next_node_to_prepare_;
// Whether to delegate to NN API
std::unique_ptr<NNAPIDelegate> nnapi_delegate_;
+
+ std::unique_ptr<MemoryPlanner> memory_planner_;
};
} // namespace tflite
diff --git a/tensorflow/contrib/lite/memory_planner.h b/tensorflow/contrib/lite/memory_planner.h
new file mode 100644
index 0000000000..b11d86c375
--- /dev/null
+++ b/tensorflow/contrib/lite/memory_planner.h
@@ -0,0 +1,45 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_MEMORY_PLANNER_H_
+#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_MEMORY_PLANNER_H_
+
+#include "tensorflow/contrib/lite/context.h"
+
+namespace tflite {
+
+// A MemoryPlanner is responsible for planning and executing a number of
+// memory-related operations that are necessary in TF Lite.
+class MemoryPlanner {
+ public:
+ virtual ~MemoryPlanner() {}
+
+ // Plans the necessary memory allocations. This is the MemoryPlanner's
+ // pre-processing step and is called when the graph structure is known but
+ // actual size of the tensors is not.
+ virtual TfLiteStatus PlanAllocations() = 0;
+
+ // Allocates the necessary memory to execute all nodes in the interval
+ // [first_node, last_node].
+ virtual TfLiteStatus ExecuteAllocations(int first_node, int last_node) = 0;
+
+ // Invalidates allocations made earliers. This is called when tensors sizes
+ // have change. All planned allocations remain, but can't be used until
+ // ExecuteAllocations() is called.
+ virtual TfLiteStatus ResetAllocations() = 0;
+};
+
+} // namespace tflite
+
+#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_MEMORY_PLANNER_H_
diff --git a/tensorflow/contrib/lite/simple_memory_arena.h b/tensorflow/contrib/lite/simple_memory_arena.h
index 0d0b7f9ff7..07a38c4243 100644
--- a/tensorflow/contrib/lite/simple_memory_arena.h
+++ b/tensorflow/contrib/lite/simple_memory_arena.h
@@ -68,6 +68,10 @@ class SimpleMemoryArena {
TfLiteStatus Clear();
+ int64_t BasePointer() const {
+ return reinterpret_cast<int64_t>(underlying_buffer_aligned_ptr_);
+ }
+
private:
bool commited_;
size_t arena_alignment_;