diff options
author | 2018-01-13 16:21:55 -0800 | |
---|---|---|
committer | 2018-01-13 16:25:51 -0800 | |
commit | e6ff665dbe4888aa5fdff8f34c44405acca2ddd1 (patch) | |
tree | a290fe97b76bebec82ef3b3a76c906acf6b55f41 /tensorflow/contrib | |
parent | a4973345351a14a786987cd7f648a99c029fdc1d (diff) |
Clean up the allocation logic in the interpreter.
PiperOrigin-RevId: 181865795
Diffstat (limited to 'tensorflow/contrib')
-rw-r--r-- | tensorflow/contrib/lite/BUILD | 49 | ||||
-rw-r--r-- | tensorflow/contrib/lite/arena_planner.cc | 247 | ||||
-rw-r--r-- | tensorflow/contrib/lite/arena_planner.h | 107 | ||||
-rw-r--r-- | tensorflow/contrib/lite/arena_planner_test.cc | 472 | ||||
-rw-r--r-- | tensorflow/contrib/lite/graph_info.h | 53 | ||||
-rw-r--r-- | tensorflow/contrib/lite/interpreter.cc | 304 | ||||
-rw-r--r-- | tensorflow/contrib/lite/interpreter.h | 40 | ||||
-rw-r--r-- | tensorflow/contrib/lite/memory_planner.h | 45 | ||||
-rw-r--r-- | tensorflow/contrib/lite/simple_memory_arena.h | 4 |
9 files changed, 1096 insertions, 225 deletions
diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD index 3f1b0be1a7..13350c5a43 100644 --- a/tensorflow/contrib/lite/BUILD +++ b/tensorflow/contrib/lite/BUILD @@ -35,6 +35,28 @@ cc_library( hdrs = ["version.h"], ) +cc_library( + name = "arena_planner", + srcs = ["arena_planner.cc"], + hdrs = ["arena_planner.h"], + deps = [ + ":context", + ":graph_info", + ":memory_planner", + ":simple_memory_arena", + ], +) + +cc_test( + name = "arena_planner_test", + size = "small", + srcs = ["arena_planner_test.cc"], + deps = [ + ":arena_planner", + "@com_google_googletest//:gtest", + ], +) + # Main library. No ops are included here. # TODO(aselle): Resolve problems preventing C99 usage. cc_library( @@ -44,6 +66,25 @@ cc_library( ) cc_library( + name = "graph_info", + hdrs = ["graph_info.h"], + deps = [":context"], +) + +cc_library( + name = "memory_planner", + hdrs = ["memory_planner.h"], + deps = [":context"], +) + +cc_library( + name = "simple_memory_arena", + srcs = ["simple_memory_arena.cc"], + hdrs = ["simple_memory_arena.h"], + deps = [":context"], +) + +cc_library( name = "builtin_op_data", hdrs = [ "builtin_op_data.h", @@ -70,7 +111,6 @@ cc_library( "model.cc", "nnapi_delegate.cc", "optional_debug_tools.cc", - "simple_memory_arena.cc", ], hdrs = [ "allocation.h", @@ -80,13 +120,16 @@ cc_library( "model.h", "nnapi_delegate.h", "optional_debug_tools.h", - "simple_memory_arena.h", ], copts = tflite_copts(), deps = [ + ":arena_planner", ":builtin_op_data", ":context", + ":graph_info", + ":memory_planner", ":schema_fbs_version", + ":simple_memory_arena", "//tensorflow/contrib/lite/kernels:gemm_support", "//tensorflow/contrib/lite/nnapi:nnapi_lib", "//tensorflow/contrib/lite/schema:schema_fbs", @@ -134,7 +177,7 @@ cc_test( size = "small", srcs = ["simple_memory_arena_test.cc"], deps = [ - ":framework", + ":simple_memory_arena", "//tensorflow/contrib/lite/testing:util", "@com_google_googletest//:gtest", ], diff --git a/tensorflow/contrib/lite/arena_planner.cc b/tensorflow/contrib/lite/arena_planner.cc new file mode 100644 index 0000000000..bf1bcdd1a7 --- /dev/null +++ b/tensorflow/contrib/lite/arena_planner.cc @@ -0,0 +1,247 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/arena_planner.h" + +namespace tflite { + +namespace { + +// Memory allocation tuning +constexpr const int kDefaultArenaAlignment = 64; +constexpr const int kDefaultTensorAlignment = 4; + +} // namespace + +struct AllocationInfo { + // The node index requesting this allocation. + int node; + // The tensor index to be allocated or deallocated. + int tensor; + // Whether to allocate or deallocate + enum { ALLOC, DEALLOC } type; +}; + +ArenaPlanner::ArenaPlanner(TfLiteContext* context, + std::unique_ptr<GraphInfo> graph_info) + : context_(context), + graph_info_(std::move(graph_info)), + arena_(kDefaultArenaAlignment), + persistent_arena_(kDefaultArenaAlignment) {} + +ArenaPlanner::~ArenaPlanner() {} + +int64_t ArenaPlanner::BasePointer(TfLiteAllocationType type) { + if (type == kTfLiteArenaRwPersistent) { + return persistent_arena_.BasePointer(); + } + if (type == kTfLiteArenaRw) { + return arena_.BasePointer(); + } + return 0; +} + +TfLiteStatus ArenaPlanner::ResetAllocations() { + TF_LITE_ENSURE_STATUS(arena_.Clear()); + TF_LITE_ENSURE_STATUS(persistent_arena_.Clear()); + allocs_.clear(); + allocs_.resize(graph_info_->num_tensors()); + return kTfLiteOk; +} + +TfLiteStatus ArenaPlanner::PlanAllocations() { + // Invalidate any existing data. + TF_LITE_ENSURE_STATUS(ResetAllocations()); + + // Keeps track of references to each tensor. + std::vector<int> refcounts(graph_info_->num_tensors(), 0); + + // There will be an entry in alloc_queue_ for the allocation of each tensor + // and another for their deallocation. + alloc_queue_.reserve(2 * graph_info_->num_tensors()); + + // We must make sure the output tensors are never overwritten. We do that by + // artificially adding one to their ref-counts so they are never selected + // for deallocation. + for (int tensor_index : graph_info_->outputs()) { + refcounts[tensor_index]++; + } + + // Count references to node input tensors. + for (int i = 0; i < graph_info_->num_nodes(); ++i) { + const TfLiteNode& node = graph_info_->node(i); + TfLiteIntArray* node_inputs = node.inputs; + for (int j = 0; j < node_inputs->size; ++j) { + int tensor_index = node_inputs->data[j]; + if (tensor_index != kOptionalTensor) { + refcounts[tensor_index]++; + } + } + } + + // Queue all graph inputs for allocation. + for (int tensor_index : graph_info_->inputs()) { + if (tensor_index != kOptionalTensor) { + alloc_queue_.push_back({0, tensor_index, AllocationInfo::ALLOC}); + } + } + + // Go through the graph in execution order. + for (int i = 0; i < graph_info_->num_nodes(); ++i) { + const TfLiteNode& node = graph_info_->node(i); + + // First queue output tensors for allocation. + TfLiteIntArray* node_outputs = node.outputs; + for (int j = 0; j < node_outputs->size; ++j) { + int tensor_index = node_outputs->data[j]; + alloc_queue_.push_back({i, tensor_index, AllocationInfo::ALLOC}); + } + + // Then update the ref-counts of the node's inputs, and if necessary queue + // them for deallocation. + TfLiteIntArray* node_inputs = node.inputs; + for (int j = 0; j < node_inputs->size; ++j) { + int tensor_index = node_inputs->data[j]; + if (tensor_index != kOptionalTensor) { + refcounts[tensor_index]--; + if (refcounts[tensor_index] == 0) { + alloc_queue_.push_back({i, tensor_index, AllocationInfo::DEALLOC}); + } + } + } + } + + // Note that graph outputs will never be scheduled for deallocation. We + // could do that here for completeness, but it won't have any effect. + return kTfLiteOk; +} + +TfLiteStatus ArenaPlanner::ExecuteAllocations(int first_node, int last_node) { + TF_LITE_ENSURE_STATUS(CalculateAllocations(first_node, last_node)); + TF_LITE_ENSURE_STATUS(Commit()); + + for (int i = 0; i < graph_info_->num_tensors(); ++i) { + // TODO(ahentz): we could do this only for the tensors that were modified + // in CalculateAllocations(), instead of redoing it for tensors that + // already had proper pointers. However we must be very careful, because + // SimpleMemoryArena::Commit() could move the base pointer. + TF_LITE_ENSURE_STATUS(ResolveTensorAllocation(i)); + } + + return kTfLiteOk; +} + +TfLiteStatus ArenaPlanner::Commit() { + TF_LITE_ENSURE_STATUS(arena_.Commit(context_)); + TF_LITE_ENSURE_STATUS(persistent_arena_.Commit(context_)); + return kTfLiteOk; +} + +TfLiteStatus ArenaPlanner::CalculateAllocations(int first_node, int last_node) { + int active_node = first_node; + // When dynamic tensors are present this method is called multiple times. + // The items in the alloc_queue_ referring to nodes before first_node were + // processed previously and should be skipped. Entries after last_node are + // not yet ready to be handled. + for (const auto& alloc_info : alloc_queue_) { + if (alloc_info.node < first_node) continue; + if (alloc_info.node > last_node) break; + if (alloc_info.node == active_node) { + // This is the first allocation/deallocation for a given node. It is + // time to deallocate the previous temporaries and allocate new ones. + if (active_node != first_node) { + TF_LITE_ENSURE_STATUS( + CalculateDeallocationOfInternalTensors(active_node - 1)); + } + TF_LITE_ENSURE_STATUS(CalculateAllocationOfInternalTensors(active_node)); + ++active_node; + } + // Handle the current item. + if (alloc_info.type == AllocationInfo::ALLOC) { + TF_LITE_ENSURE_STATUS(CalculateTensorAllocation(alloc_info.tensor)); + } else { + TF_LITE_ENSURE_STATUS(CalculateTensorDeallocation(alloc_info.tensor)); + } + } + + // Don't forget to deallocate temporaries of last node. + TF_LITE_ENSURE_STATUS( + CalculateDeallocationOfInternalTensors(active_node - 1)); + + return kTfLiteOk; +} + +TfLiteStatus ArenaPlanner::ResolveTensorAllocation(int tensor_index) { + TfLiteTensor& tensor = *graph_info_->tensor(tensor_index); + if (tensor.allocation_type == kTfLiteArenaRw) { + TF_LITE_ENSURE_STATUS( + arena_.ResolveAlloc(context_, allocs_[tensor_index], &tensor.data.raw)); + } + if (tensor.allocation_type == kTfLiteArenaRwPersistent) { + TF_LITE_ENSURE_STATUS(persistent_arena_.ResolveAlloc( + context_, allocs_[tensor_index], &tensor.data.raw)); + } + return kTfLiteOk; +} + +TfLiteStatus ArenaPlanner::CalculateTensorAllocation(int tensor_index) { + TfLiteTensor& tensor = *graph_info_->tensor(tensor_index); + if (tensor.allocation_type == kTfLiteArenaRw) { + TF_LITE_ENSURE_STATUS(arena_.Allocate(context_, kDefaultTensorAlignment, + tensor.bytes, + &allocs_[tensor_index])); + } + if (tensor.allocation_type == kTfLiteArenaRwPersistent) { + TF_LITE_ENSURE_STATUS( + persistent_arena_.Allocate(context_, kDefaultTensorAlignment, + tensor.bytes, &allocs_[tensor_index])); + } + return kTfLiteOk; +} + +TfLiteStatus ArenaPlanner::CalculateTensorDeallocation(int tensor_index) { + TfLiteTensor& tensor = *graph_info_->tensor(tensor_index); + if (tensor.allocation_type == kTfLiteArenaRw) { + TF_LITE_ENSURE_STATUS(arena_.Deallocate(context_, allocs_[tensor_index])); + } + return kTfLiteOk; +} + +TfLiteStatus ArenaPlanner::CalculateAllocationOfInternalTensors( + int node_index) { + if (node_index < graph_info_->num_nodes()) { + const TfLiteNode& node = graph_info_->node(node_index); + TfLiteIntArray* node_temporaries = node.temporaries; + for (int i = 0; i < node_temporaries->size; ++i) { + int tensor_index = node_temporaries->data[i]; + TF_LITE_ENSURE_STATUS(CalculateTensorAllocation(tensor_index)); + } + } + return kTfLiteOk; +} + +TfLiteStatus ArenaPlanner::CalculateDeallocationOfInternalTensors( + int node_index) { + if (node_index < graph_info_->num_nodes()) { + const TfLiteNode& node = graph_info_->node(node_index); + TfLiteIntArray* node_temporaries = node.temporaries; + for (int i = 0; i < node_temporaries->size; ++i) { + int tensor_index = node_temporaries->data[i]; + TF_LITE_ENSURE_STATUS(CalculateTensorDeallocation(tensor_index)); + } + } + return kTfLiteOk; +} + +} // namespace tflite diff --git a/tensorflow/contrib/lite/arena_planner.h b/tensorflow/contrib/lite/arena_planner.h new file mode 100644 index 0000000000..bd87414ec3 --- /dev/null +++ b/tensorflow/contrib/lite/arena_planner.h @@ -0,0 +1,107 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_ARENA_PLANNER_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_ARENA_PLANNER_H_ + +#include <memory> +#include <vector> + +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/graph_info.h" +#include "tensorflow/contrib/lite/memory_planner.h" +#include "tensorflow/contrib/lite/simple_memory_arena.h" + +namespace tflite { + +class AllocationInfo; + +// A memory planner that makes all the allocations using arenas. +// +// Before a model is executed by the interpreter, this class determines when +// each tensor needs to be allocated and deallocated, and preallocates all the +// necessary memory (the PlanAllocations phase). It then assigns portions of +// this memory buffer to each tensor (the ExecuteAllocations phase). Tensors may +// share some of the bufer if a tensor B is to be allocated after another tensor +// A has been deallocated. +// +// If dynamic tensors are used the planning steps can be repeated during model +// execution. Since dynamic tensors don't have sizes until after the +// corresponding operation is executed, this class supports incremental +// planning. +class ArenaPlanner : public MemoryPlanner { + public: + // Ownership of 'context' is not taken and it must remain util the + // ArenaPlanner is destroyed. + ArenaPlanner(TfLiteContext* context, std::unique_ptr<GraphInfo> graph_info); + ~ArenaPlanner() override; + ArenaPlanner(const ArenaPlanner&) = delete; + ArenaPlanner& operator=(const ArenaPlanner&) = delete; + + TfLiteStatus ResetAllocations() override; + TfLiteStatus PlanAllocations() override; + TfLiteStatus ExecuteAllocations(int first_node, int last_node) override; + + // Returns the base arena location for a given allocation type. + int64_t BasePointer(TfLiteAllocationType type); + + private: + // Make sure all the arenas have reserved enough memory to store all their + // tensors. + TfLiteStatus Commit(); + + // Traverse the allocation queue and reserve space in the appropriate arena + // for all tensors affected by ops in the interval [first_node, last_node]. + TfLiteStatus CalculateAllocations(int first_node, int last_node); + + // Assign absolute memory location to a tensor, based on its relative + // position inside the corresponding arena buffer. + TfLiteStatus ResolveTensorAllocation(int tensor_index); + + // Register an allocation for the given tensor. + TfLiteStatus CalculateTensorAllocation(int tensor_index); + + // Register a deallocation for the given tensor. + TfLiteStatus CalculateTensorDeallocation(int tensor_index); + + // Register an allocation for all internal (temporary) tensors of + // 'node_index'. + TfLiteStatus CalculateAllocationOfInternalTensors(int node_index); + + // Register a deallocation for all internal (temporary) tensors of + // 'node_index'. + TfLiteStatus CalculateDeallocationOfInternalTensors(int node_index); + + TfLiteContext* context_; + std::unique_ptr<GraphInfo> graph_info_; + + // Stores allocation data for all tensors. + std::vector<ArenaAlloc> allocs_; + + // A chronological list of instructions to allocated and deallocate tensors, + // reflecting the way they are used in the graph. + std::vector<AllocationInfo> alloc_queue_; + + // Raw memory buffer that is allocated for all temporary and graph outputs. + // that are declared kTfLiteArenaRw. + SimpleMemoryArena arena_; + + // Raw memory buffer that is allocated for persistent tensors that are + // declared as kTfLiteArenaRwPersistent. + SimpleMemoryArena persistent_arena_; +}; + +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_ARENA_PLANNER_H_ diff --git a/tensorflow/contrib/lite/arena_planner_test.cc b/tensorflow/contrib/lite/arena_planner_test.cc new file mode 100644 index 0000000000..c27c327abc --- /dev/null +++ b/tensorflow/contrib/lite/arena_planner_test.cc @@ -0,0 +1,472 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/arena_planner.h" + +#include <cstdarg> + +#include <gmock/gmock.h> +#include <gtest/gtest.h> + +namespace tflite { +namespace { + +// A simple op to be used in tests, as syntactic sugar. +class TestOp { + public: + TestOp(std::initializer_list<int> inputs, std::initializer_list<int> outputs, + std::initializer_list<int> temporaries) + : inputs_(inputs), outputs_(outputs), temporaries_(temporaries) {} + + const std::vector<int>& inputs() const { return inputs_; } + const std::vector<int>& outputs() const { return outputs_; } + const std::vector<int>& temporaries() const { return temporaries_; } + + private: + std::vector<int> inputs_; + std::vector<int> outputs_; + std::vector<int> temporaries_; +}; + +// A test graph where inputs are processed by the given nodes to produce +// outputs. +class TestGraph { + public: + TestGraph(std::initializer_list<int> inputs, + std::initializer_list<TestOp> nodes, + std::initializer_list<int> outputs) + : inputs_(inputs), outputs_(outputs) { + int max_tensor_index = 0; + + for (int t : inputs) { + max_tensor_index = std::max(max_tensor_index, t); + } + for (int t : outputs) { + max_tensor_index = std::max(max_tensor_index, t); + } + for (const auto& node : nodes) { + auto int_array = [](const std::vector<int>& x) { + TfLiteIntArray* lite = TfLiteIntArrayCreate(x.size()); + for (size_t i = 0; i < x.size(); i++) lite->data[i] = x[i]; + return lite; + }; + + nodes_.push_back(TfLiteNode()); + nodes_.back().inputs = int_array(node.inputs()); + for (int t : node.inputs()) { + max_tensor_index = std::max(max_tensor_index, t); + } + nodes_.back().outputs = int_array(node.outputs()); + for (int t : node.outputs()) { + max_tensor_index = std::max(max_tensor_index, t); + } + nodes_.back().temporaries = int_array(node.temporaries()); + for (int t : node.temporaries()) { + max_tensor_index = std::max(max_tensor_index, t); + } + } + + for (int i = 0; i <= max_tensor_index; ++i) { + tensors_.push_back(TfLiteTensor()); + // Set some default values for allocation_type and bytes, which are the + // only fields used by the arena planner. + tensors_.back().allocation_type = kTfLiteArenaRw; + tensors_.back().bytes = (i + 1) * 3; + } + } + + ~TestGraph() { + for (auto node : nodes_) { + TfLiteIntArrayFree(node.inputs); + TfLiteIntArrayFree(node.outputs); + TfLiteIntArrayFree(node.temporaries); + } + } + + const std::vector<TfLiteNode>& nodes() { return nodes_; } + std::vector<TfLiteTensor>* tensors() { return &tensors_; } + const std::vector<int>& inputs() { return inputs_; } + const std::vector<int>& outputs() { return outputs_; } + + private: + std::vector<TfLiteNode> nodes_; + std::vector<TfLiteTensor> tensors_; + std::vector<int> inputs_; + std::vector<int> outputs_; +}; + +// The GraphInfo for a TestGraph. +class TestGraphInfo : public GraphInfo { + public: + explicit TestGraphInfo(TestGraph* graph) : graph_(graph) {} + + size_t num_tensors() const override { return graph_->tensors()->size(); } + TfLiteTensor* tensor(size_t index) override { + return &graph_->tensors()->at(index); + } + size_t num_nodes() const override { return graph_->nodes().size(); } + const TfLiteNode& node(size_t index) const override { + return graph_->nodes()[index]; + } + const std::vector<int>& inputs() const override { return graph_->inputs(); } + const std::vector<int>& outputs() const override { return graph_->outputs(); } + + private: + TestGraph* graph_; +}; + +void ReportError(TfLiteContext* context, const char* format, ...) { + const size_t kBufferSize = 1024; + char temp_buffer[kBufferSize]; + + va_list args; + va_start(args, format); + vsnprintf(temp_buffer, kBufferSize, format, args); + va_end(args); + + LOG(INFO) << temp_buffer; +} + +class ArenaPlannerTest : public ::testing::Test { + protected: + void SetGraph(TestGraph* graph) { + graph_ = graph; + context_.ReportError = ReportError; + planner_.reset(new ArenaPlanner( + &context_, std::unique_ptr<GraphInfo>(new TestGraphInfo(graph)))); + CHECK(planner_->ResetAllocations() == kTfLiteOk); + CHECK(planner_->PlanAllocations() == kTfLiteOk); + } + + void Execute(int start, int end) { + CHECK(planner_->ExecuteAllocations(start, end) == kTfLiteOk); + } + + // Returns the actual offset of a given tensor, relative to the start of its + // arena. + int64_t GetOffset(int tensor_index) { + const TfLiteTensor& tensor = (*graph_->tensors())[tensor_index]; + return reinterpret_cast<int64_t>(tensor.data.raw) - + planner_->BasePointer(tensor.allocation_type); + } + + // Returns the first aligned offset after a given tensor. + int64_t GetOffsetAfter(int tensor_index) { + const TfLiteTensor& tensor = (*graph_->tensors())[tensor_index]; + int64_t offset = GetOffset(tensor_index) + tensor.bytes; + // We must make sure the offset is aligned to kDefaultArenaAlignment. + if (offset % 4 != 0) { + offset += 4 - offset % 4; + } + return offset; + }; + + TfLiteContext context_; + TestGraph* graph_; + std::unique_ptr<ArenaPlanner> planner_; +}; + +TEST_F(ArenaPlannerTest, EmptyGraph) { + TestGraph graph({}, {}, {}); + SetGraph(&graph); + Execute(0, 10); +} + +TEST_F(ArenaPlannerTest, GraphWithNoOps) { + TestGraph graph({0, 10}, {}, {5, 11}); + SetGraph(&graph); + Execute(0, 10); + EXPECT_EQ(GetOffset(0), 0); + EXPECT_EQ(GetOffset(10), GetOffsetAfter(0)); + // The outputs are never allocated because they are not connected to any + // inputs. + EXPECT_EQ(GetOffset(5), 0); + EXPECT_EQ(GetOffset(11), 0); +} + +TEST_F(ArenaPlannerTest, GraphWithOneOp) { + TestGraph graph({1}, {{{1}, {2}, {}}}, {2}); + SetGraph(&graph); + Execute(0, 10); + EXPECT_EQ(GetOffset(1), 0); + EXPECT_EQ(GetOffset(2), GetOffsetAfter(1)); +} + +TEST_F(ArenaPlannerTest, ZeroSizedTensors) { + TestGraph graph({1}, {{{1}, {2}, {}}}, {2}); + (*graph.tensors())[1].bytes = 0; + SetGraph(&graph); + // TODO(ahentz): this is currently broken because the arena finds two + // allocations with the same offset and returns an error. + ASSERT_FALSE(planner_->ExecuteAllocations(0, 10) == kTfLiteOk); + // EXPECT_EQ(GetOffset(1), 0); + // EXPECT_EQ(GetOffset(2), GetOffsetAfter(1)); +} + +TEST_F(ArenaPlannerTest, SimpleGraph) { + TestGraph graph({0, 1}, + { + /* in, out, tmp */ + {{0, 1}, {2}, {}}, // First op + {{2, 0}, {4, 5}, {}}, // Second op + {{4, 5}, {3}, {}} // Third op + }, + {3}); + SetGraph(&graph); + Execute(0, 10); + + // Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +4 +5 -2 -0 +3 -4 -5 + EXPECT_EQ(GetOffset(0), 0); + EXPECT_EQ(GetOffset(1), GetOffsetAfter(0)); + EXPECT_EQ(GetOffset(2), GetOffsetAfter(1)); + EXPECT_EQ(GetOffset(4), GetOffsetAfter(2)); + EXPECT_EQ(GetOffset(5), GetOffsetAfter(4)); + EXPECT_EQ(GetOffset(3), 0); +} + +TEST_F(ArenaPlannerTest, SimpleGraphWithTemporary) { + TestGraph graph({0, 1}, + { + /* in, out, tmp */ + {{0, 1}, {2}, {}}, // First op + {{2, 0}, {4}, {5}}, // Second op, with temporary + {{4}, {3}, {}} // Third op + }, + {3}); + SetGraph(&graph); + Execute(0, 10); + + // Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4 + EXPECT_EQ(GetOffset(0), 0); + EXPECT_EQ(GetOffset(1), GetOffsetAfter(0)); + EXPECT_EQ(GetOffset(2), GetOffsetAfter(1)); + EXPECT_EQ(GetOffset(5), GetOffsetAfter(2)); + EXPECT_EQ(GetOffset(4), GetOffsetAfter(5)); + EXPECT_EQ(GetOffset(3), 0); +} + +TEST_F(ArenaPlannerTest, SimpleGraphWithOptionals) { + TestGraph graph({0, -1, 1}, + { + /* in, out, tmp */ + {{0, 1}, {2}, {}}, // First op + {{2, 0}, {4, 5}, {}}, // Second op + {{4, -1, 5}, {3}, {}} // Third op, with optional + }, + {3}); + SetGraph(&graph); + Execute(0, 10); + + // Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +4 +5 -2 -0 +3 -4 -5 + EXPECT_EQ(GetOffset(0), 0); + EXPECT_EQ(GetOffset(1), GetOffsetAfter(0)); + EXPECT_EQ(GetOffset(2), GetOffsetAfter(1)); + EXPECT_EQ(GetOffset(4), GetOffsetAfter(2)); + EXPECT_EQ(GetOffset(5), GetOffsetAfter(4)); + EXPECT_EQ(GetOffset(3), 0); +} + +TEST_F(ArenaPlannerTest, SimpleGraphWithLargeTensor) { + TestGraph graph({0, -1, 1}, + { + /* in, out, tmp */ + {{0, 1}, {2}, {}}, // First op + {{2, 0}, {4}, {5}}, // Second op, with temporary + {{4, -1}, {3}, {}} // Third op, with optional + }, + {3}); + + // Make #1 very large so its vacancy can be filled with #5 and #4. + (*graph.tensors())[1].bytes = 40; + + SetGraph(&graph); + Execute(0, 10); + + // Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4 + EXPECT_EQ(GetOffset(0), 0); + EXPECT_EQ(GetOffset(1), GetOffsetAfter(0)); + EXPECT_EQ(GetOffset(2), GetOffsetAfter(1)); + EXPECT_EQ(GetOffset(5), GetOffsetAfter(0)); + EXPECT_EQ(GetOffset(4), GetOffsetAfter(5)); + EXPECT_EQ(GetOffset(3), 0); +} + +TEST_F(ArenaPlannerTest, SimpleGraphWithPersistentTensor) { + TestGraph graph({0, -1, 1}, + { + /* in, out, tmp */ + {{0, 1}, {2}, {}}, // First op + {{2, 0}, {4}, {5}}, // Second op, with temporary + {{4, -1}, {3}, {}} // Third op, with optional + }, + {3}); + + // Make #1 persistent so it goes into its own arena. + (*graph.tensors())[1].allocation_type = kTfLiteArenaRwPersistent; + + SetGraph(&graph); + Execute(0, 10); + + // Make sure #0 and #1 were given different memory locations (because they + // will both have offset=0, in different arenas.) + EXPECT_NE((*graph.tensors())[0].data.raw, (*graph.tensors())[1].data.raw); + + // Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4 + EXPECT_EQ(GetOffset(0), 0); + EXPECT_EQ(GetOffset(1), 0); + EXPECT_EQ(GetOffset(2), GetOffsetAfter(0)); + EXPECT_EQ(GetOffset(5), GetOffsetAfter(2)); + EXPECT_EQ(GetOffset(4), GetOffsetAfter(5)); + EXPECT_EQ(GetOffset(3), 0); +} + +TEST_F(ArenaPlannerTest, SimpleGraphWithDynamicTensor) { + TestGraph graph({0, -1, 1}, + { + /* in, out, tmp */ + {{0, 1}, {2}, {}}, // First op + {{2, 0}, {4}, {5}}, // Second op, with temporary + {{4, -1}, {3}, {}} // Third op, with optional + }, + {3}); + + // Make #1 dynaic so it does not get allocated. + (*graph.tensors())[1].allocation_type = kTfLiteDynamic; + + SetGraph(&graph); + Execute(0, 10); + + EXPECT_EQ((*graph.tensors())[1].data.raw, nullptr); + + // Alloc(+) and dealloc(-) order: +0 +1 +2 -1 +5 +4 -2 -0 -5 +3 -4 + EXPECT_EQ(GetOffset(0), 0); + EXPECT_EQ(GetOffset(2), GetOffsetAfter(0)); + EXPECT_EQ(GetOffset(5), GetOffsetAfter(2)); + EXPECT_EQ(GetOffset(4), GetOffsetAfter(5)); + EXPECT_EQ(GetOffset(3), 0); +} + +TEST_F(ArenaPlannerTest, LargerGraphAndStepwiseAllocation) { + TestGraph graph({0, 1}, + { + /* in, out, tmp */ + {{0, 1}, {2, 3}, {}}, + {{2, 0}, {4, 5}, {6}}, + {{1, -1}, {7}, {}}, + {{7, 3}, {8}, {9}}, + {{4, 5, 8}, {10}, {}}, + }, + {10}); + SetGraph(&graph); + + auto is_unallocated = [&](int tensor_index) { + // TODO(ahentz): We'd to use nullptr to represent unallocated tensors, but + // the current code still points them all to the beginning fo the alloc + // (that is, zero offset). + // return (*graph.tensors())[tensor_index].data.raw == nullptr; + return GetOffset(tensor_index) == 0; + }; + + // The allocation plan is made at the beginning and is independent of + // the execution steps. Here's the allocation order: + // Op0: +0 +1 +2 +3 + // Op1: +6 +4 +5 -6 -0 -2 + // Op2: +7 -1 + // Op3: +9 +8 -9 -3 -7 + // Op4: +10 -4 -5 -8 + + Execute(0, 0); + EXPECT_EQ(GetOffset(0), 0); + EXPECT_EQ(GetOffset(1), GetOffsetAfter(0)); + EXPECT_EQ(GetOffset(2), GetOffsetAfter(1)); + EXPECT_EQ(GetOffset(3), GetOffsetAfter(2)); + EXPECT_TRUE(is_unallocated(6)); + EXPECT_TRUE(is_unallocated(4)); + EXPECT_TRUE(is_unallocated(5)); + EXPECT_TRUE(is_unallocated(7)); + EXPECT_TRUE(is_unallocated(9)); + EXPECT_TRUE(is_unallocated(8)); + EXPECT_TRUE(is_unallocated(10)); + + Execute(1, 1); + EXPECT_EQ(GetOffset(0), 0); + EXPECT_EQ(GetOffset(1), GetOffsetAfter(0)); + EXPECT_EQ(GetOffset(2), GetOffsetAfter(1)); + EXPECT_EQ(GetOffset(3), GetOffsetAfter(2)); + EXPECT_EQ(GetOffset(6), GetOffsetAfter(3)); + EXPECT_EQ(GetOffset(4), GetOffsetAfter(6)); + EXPECT_EQ(GetOffset(5), GetOffsetAfter(4)); + EXPECT_TRUE(is_unallocated(7)); + EXPECT_TRUE(is_unallocated(9)); + EXPECT_TRUE(is_unallocated(8)); + EXPECT_TRUE(is_unallocated(10)); + + Execute(2, 2); + EXPECT_EQ(GetOffset(0), 0); + EXPECT_EQ(GetOffset(1), GetOffsetAfter(0)); + EXPECT_EQ(GetOffset(2), GetOffsetAfter(1)); + EXPECT_EQ(GetOffset(3), GetOffsetAfter(2)); + EXPECT_EQ(GetOffset(6), GetOffsetAfter(3)); + EXPECT_EQ(GetOffset(4), GetOffsetAfter(6)); + EXPECT_EQ(GetOffset(5), GetOffsetAfter(4)); + // Here's an interesting allocation. Even though #6 requires only 21 bytes, + // its deallocation freed up 24 bytes due to the alignment requirements in + // the arena. That means we can fit #7 in the same space! + EXPECT_EQ(GetOffset(7), GetOffsetAfter(3)); + EXPECT_TRUE(is_unallocated(9)); + EXPECT_TRUE(is_unallocated(8)); + EXPECT_TRUE(is_unallocated(10)); + + Execute(3, 3); + EXPECT_EQ(GetOffset(0), 0); + EXPECT_EQ(GetOffset(1), GetOffsetAfter(0)); + EXPECT_EQ(GetOffset(2), GetOffsetAfter(1)); + EXPECT_EQ(GetOffset(3), GetOffsetAfter(2)); + EXPECT_EQ(GetOffset(6), GetOffsetAfter(3)); + EXPECT_EQ(GetOffset(4), GetOffsetAfter(6)); + EXPECT_EQ(GetOffset(5), GetOffsetAfter(4)); + EXPECT_EQ(GetOffset(7), GetOffsetAfter(3)); + // The deallocation of #0, #1 and #2 freed up 24 bytes but that's not enough + // for #9, so it goes at the end. + EXPECT_EQ(GetOffset(9), GetOffsetAfter(5)); + EXPECT_EQ(GetOffset(8), GetOffsetAfter(9)); + EXPECT_TRUE(is_unallocated(10)); + + Execute(4, 4); + EXPECT_EQ(GetOffset(0), 0); + EXPECT_EQ(GetOffset(1), GetOffsetAfter(0)); + EXPECT_EQ(GetOffset(2), GetOffsetAfter(1)); + EXPECT_EQ(GetOffset(3), GetOffsetAfter(2)); + EXPECT_EQ(GetOffset(6), GetOffsetAfter(3)); + EXPECT_EQ(GetOffset(4), GetOffsetAfter(6)); + EXPECT_EQ(GetOffset(5), GetOffsetAfter(4)); + EXPECT_EQ(GetOffset(7), GetOffsetAfter(3)); + EXPECT_EQ(GetOffset(9), GetOffsetAfter(5)); + EXPECT_EQ(GetOffset(8), GetOffsetAfter(9)); + // There's just enough space at the beginning for #10 due to the + // deallocation of #0, #1, #2 and #3 (total 36 bytes, #10 needs + // only 33.) + EXPECT_EQ(GetOffset(10), 0); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + // ::tflite::LogToStderr(); + FLAGS_logtostderr = true; + + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/graph_info.h b/tensorflow/contrib/lite/graph_info.h new file mode 100644 index 0000000000..5481aede60 --- /dev/null +++ b/tensorflow/contrib/lite/graph_info.h @@ -0,0 +1,53 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_GRAPH_INFO_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_GRAPH_INFO_H_ + +#include <vector> + +#include "tensorflow/contrib/lite/context.h" + +namespace tflite { + +// Basic information about an inference graph, where execution nodes +// are connected via tensors. +class GraphInfo { + public: + virtual ~GraphInfo() {} + + // Total number of tensors in the graph. + virtual size_t num_tensors() const = 0; + + // Returns a tensor given its index which is expected to be between 0 and + // num_tensors(). + virtual TfLiteTensor* tensor(size_t index) = 0; + + // Total number of nodes in the graph. + virtual size_t num_nodes() const = 0; + + // Returns a node given its index which is expected to be between 0 and + // num_nodes(). + virtual const TfLiteNode& node(size_t index) const = 0; + + // Returns the indices of the input tensors. + virtual const std::vector<int>& inputs() const = 0; + + // Returns the indices of the output tensors. + virtual const std::vector<int>& outputs() const = 0; +}; + +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_GRAPH_INFO_H_ diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc index 954e236ac8..5f5981e45a 100644 --- a/tensorflow/contrib/lite/interpreter.cc +++ b/tensorflow/contrib/lite/interpreter.cc @@ -18,16 +18,16 @@ limitations under the License. #include <cstdarg> #include <cstdint> #include <cstring> +#include "tensorflow/contrib/lite/arena_planner.h" #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/error_reporter.h" +#include "tensorflow/contrib/lite/graph_info.h" #include "tensorflow/contrib/lite/kernels/gemm_support.h" +#include "tensorflow/contrib/lite/memory_planner.h" #include "tensorflow/contrib/lite/nnapi_delegate.h" namespace { -// Memory allocation tuning -constexpr const int kDefaultArenaAlignment = 64; -constexpr const int kDefaultTensorAlignment = 4; // std::vector preallocation tuning. constexpr const int kSlotsToReserve = 128; @@ -35,10 +35,33 @@ constexpr const int kSlotsToReserve = 128; namespace tflite { +// A trivial implementation of GraphInfo around the Interpreter. +class InterpreterInfo : public GraphInfo { + public: + explicit InterpreterInfo(Interpreter* interpreter) + : interpreter_(interpreter) {} + + size_t num_tensors() const override { return interpreter_->tensors_size(); } + TfLiteTensor* tensor(size_t index) override { + return interpreter_->tensor(index); + } + size_t num_nodes() const override { return interpreter_->nodes_size(); } + const TfLiteNode& node(size_t index) const override { + return interpreter_->node_and_registration(index)->first; + } + const std::vector<int>& inputs() const override { + return interpreter_->inputs(); + } + const std::vector<int>& outputs() const override { + return interpreter_->outputs(); + } + + public: + Interpreter* interpreter_; +}; + Interpreter::Interpreter(ErrorReporter* error_reporter) - : arena_(kDefaultArenaAlignment), - persistent_arena_(kDefaultArenaAlignment), - error_reporter_(error_reporter ? error_reporter + : error_reporter_(error_reporter ? error_reporter : DefaultErrorReporter()) { context_.impl_ = static_cast<void*>(this); context_.ResizeTensor = ResizeTensor; @@ -50,7 +73,7 @@ Interpreter::Interpreter(ErrorReporter* error_reporter) // Reserve some space for the tensors to avoid excessive resizing. tensors_.reserve(kSlotsToReserve); nodes_and_registration_.reserve(kSlotsToReserve); - next_allocate_node_id_ = 0; + next_node_to_prepare_ = 0; UseNNAPI(false); } @@ -128,181 +151,6 @@ TfLiteStatus Interpreter::BytesRequired(TfLiteType type, const int* dims, return kTfLiteOk; } -TfLiteStatus Interpreter::AllocateTensorsWhoseSizesAreKnown() { - if (!consistent_) { - ReportError(&context_, "AllocateTensors() called on inconsistent model."); - return kTfLiteError; - } - if (next_allocate_node_id_ == nodes_and_registration_.size() && invokable_) { - return kTfLiteOk; - } - allocs_and_refcounts_.resize(context_.tensors_size); - - int new_next_allocate_node_id = next_allocate_node_id_; - invokable_ = false; - - // Allocate graph input nodes. - if (next_allocate_node_id_ == 0) { - for (int i = 0; i < inputs_.size(); ++i) { - int tensor_index = inputs_[i]; - if (tensor_index == kOptionalTensor) { - continue; - } - TfLiteTensor& tensor = context_.tensors[tensor_index]; - if (tensor.allocation_type == kTfLiteArenaRw) { - TF_LITE_ENSURE_OK( - &context_, - arena_.Allocate(&context_, kDefaultTensorAlignment, tensor.bytes, - &allocs_and_refcounts_[tensor_index].alloc)); - } - } - // Add 1 to output tensors, so they will not get overwritten. - for (int i = 0; i < outputs_.size(); ++i) { - allocs_and_refcounts_[outputs_[i]].count++; - } - } - - // Count references to node input tensors, and resize node-referenced tensors - // until we encounter a node that has a dynamic output tensor. - for (int k = next_allocate_node_id_; k < nodes_and_registration_.size(); - k++) { - new_next_allocate_node_id++; - TfLiteNode& node = nodes_and_registration_[k].first; - const TfLiteRegistration& registration = nodes_and_registration_[k].second; - if (OpPrepare(registration, &node) == kTfLiteError) { - return kTfLiteError; - } - - TfLiteIntArray* node_inputs = node.inputs; - for (int i = 0; i < node_inputs->size; ++i) { - int tensor_index = node_inputs->data[i]; - if (tensor_index != kOptionalTensor) { - allocs_and_refcounts_[node_inputs->data[i]].count++; - } - } - - // Discontinue if the node has dynamic outputs. - bool has_unallocated_dynamic_tensor = false; - TfLiteIntArray* node_outputs = node.outputs; - for (int i = 0; i < node_outputs->size; ++i) { - TfLiteTensor& tensor = context_.tensors[node_outputs->data[i]]; - if (tensor.allocation_type == kTfLiteDynamic) { - has_unallocated_dynamic_tensor = true; - break; - } - } - if (has_unallocated_dynamic_tensor) { - break; - } - } - - // Allocate graph persistent outputs, e.g. RNN cell states, etc. - for (int k = next_allocate_node_id_; k < new_next_allocate_node_id; k++) { - TfLiteNode& node = nodes_and_registration_[k].first; - - // Go through output tensors and allocate the persistent ones first. - TfLiteIntArray* node_outputs = node.outputs; - for (int i = 0; i < node_outputs->size; ++i) { - int tensor_index = node_outputs->data[i]; - TfLiteTensor& tensor = context_.tensors[tensor_index]; - if (tensor.allocation_type == kTfLiteArenaRwPersistent) { - TF_LITE_ENSURE_OK(&context_, - persistent_arena_.Allocate( - &context_, kDefaultTensorAlignment, tensor.bytes, - &allocs_and_refcounts_[tensor_index].alloc)); - } - } - } - - // Go through the graph in execution order. - for (int k = next_allocate_node_id_; k < new_next_allocate_node_id; k++) { - TfLiteNode& node = nodes_and_registration_[k].first; - - // First allocate output tensors. - TfLiteIntArray* node_outputs = node.outputs; - for (int i = 0; i < node_outputs->size; ++i) { - int tensor_index = node_outputs->data[i]; - TfLiteTensor& tensor = context_.tensors[tensor_index]; - if (tensor.allocation_type == kTfLiteArenaRw) { - TF_LITE_ENSURE_OK( - &context_, - arena_.Allocate(&context_, kDefaultTensorAlignment, tensor.bytes, - &allocs_and_refcounts_[tensor_index].alloc)); - } - } - // Then the temporaries, in two passes. First allocate them all, them - // deallocate them. - TfLiteIntArray* node_temporaries = node.temporaries; - for (int i = 0; i < node_temporaries->size; ++i) { - int tensor_index = node_temporaries->data[i]; - TfLiteTensor& tensor = context_.tensors[tensor_index]; - if (tensor.allocation_type == kTfLiteArenaRw) { - TF_LITE_ENSURE_OK( - &context_, - arena_.Allocate(&context_, kDefaultTensorAlignment, tensor.bytes, - &allocs_and_refcounts_[tensor_index].alloc)); - } - } - for (int i = 0; i < node_temporaries->size; ++i) { - int tensor_index = node_temporaries->data[i]; - TfLiteTensor& tensor = context_.tensors[tensor_index]; - allocs_and_refcounts_[tensor_index].count--; - if (tensor.allocation_type == kTfLiteArenaRw && - allocs_and_refcounts_[tensor_index].count == 0) { - TF_LITE_ENSURE_OK( - &context_, - arena_.Deallocate(&context_, - allocs_and_refcounts_[tensor_index].alloc)); - } - } - - // Then process the node's inputs. - TfLiteIntArray* node_inputs = node.inputs; - for (int i = 0; i < node_inputs->size; ++i) { - int tensor_index = node_inputs->data[i]; - if (tensor_index == kOptionalTensor) { - continue; - } - TfLiteTensor& tensor = context_.tensors[tensor_index]; - - // Decrease reference count and deallocate if not needed anymore. - allocs_and_refcounts_[tensor_index].count--; - if (tensor.allocation_type == kTfLiteArenaRw && - allocs_and_refcounts_[tensor_index].count == 0) { - TF_LITE_ENSURE_OK( - &context_, - arena_.Deallocate(&context_, - allocs_and_refcounts_[tensor_index].alloc)); - } - } - } - - // Resize the buffer and commit the arena. - TF_LITE_ENSURE_OK(&context_, arena_.Commit(&context_)); - TF_LITE_ENSURE_OK(&context_, persistent_arena_.Commit(&context_)); - - // Rewire the tensors to use the underlying arena buffer. - for (int i = 0; i < context_.tensors_size; ++i) { - TfLiteTensor& tensor = context_.tensors[i]; - if (tensor.allocation_type == kTfLiteArenaRw) { - TF_LITE_ENSURE_OK( - &context_, - arena_.ResolveAlloc(&context_, allocs_and_refcounts_[i].alloc, - &tensor.data.raw)); - } - if (tensor.allocation_type == kTfLiteArenaRwPersistent) { - TF_LITE_ENSURE_OK( - &context_, - persistent_arena_.ResolveAlloc( - &context_, allocs_and_refcounts_[i].alloc, &tensor.data.raw)); - } - } - - invokable_ = true; - next_allocate_node_id_ = new_next_allocate_node_id; - return kTfLiteOk; -} - namespace { TfLiteIntArray* convertVectorToTfLiteIntArray(const std::vector<int>& x) { TfLiteIntArray* lite = TfLiteIntArrayCreate(x.size()); @@ -312,11 +160,19 @@ TfLiteIntArray* convertVectorToTfLiteIntArray(const std::vector<int>& x) { } // namespace TfLiteStatus Interpreter::AllocateTensors() { - next_allocate_node_id_ = 0; - TF_LITE_ENSURE_OK(&context_, arena_.Clear()); - TF_LITE_ENSURE_OK(&context_, persistent_arena_.Clear()); - allocs_and_refcounts_.clear(); - return AllocateTensorsWhoseSizesAreKnown(); + next_node_to_prepare_ = 0; + if (memory_planner_) { + TF_LITE_ENSURE_STATUS(memory_planner_->ResetAllocations()); + } + + if (!consistent_) { + ReportError(&context_, "AllocateTensors() called on inconsistent model."); + return kTfLiteError; + } + + TF_LITE_ENSURE_STATUS(PrepareOpsAndTensors()); + invokable_ = true; + return kTfLiteOk; } TfLiteStatus Interpreter::AddNodeWithParameters( @@ -372,6 +228,57 @@ TfLiteStatus Interpreter::ResizeInputTensor(int tensor_index, return ResizeTensorImpl(&context_.tensors[tensor_index], dims_lite); } +// Returns true if at least one tensor in the given list is kTfLiteDynamic. +bool HasDynamicTensor(const TfLiteContext& context, + const TfLiteIntArray* tensors) { + for (int i = 0; i < tensors->size; ++i) { + const TfLiteTensor& tensor = context.tensors[tensors->data[i]]; + if (tensor.allocation_type == kTfLiteDynamic) { + return true; + } + } + return false; +} + +TfLiteStatus Interpreter::PrepareOpsStartingAt(int first_node, + int* last_node_prepared) { + for (int i = first_node; i < nodes_and_registration_.size(); i++) { + TfLiteNode& node = nodes_and_registration_[i].first; + const TfLiteRegistration& registration = nodes_and_registration_[i].second; + if (OpPrepare(registration, &node) == kTfLiteError) { + return kTfLiteError; + } + + *last_node_prepared = i; + + // Discontinue if the node has dynamic outputs. Note that we don't + // stop for dynamic temporary tensors since they won't affect the + // sizes of other tensors in the graph. + if (HasDynamicTensor(context_, node.outputs)) { + break; + } + } + return kTfLiteOk; +} + +TfLiteStatus Interpreter::PrepareOpsAndTensors() { + if (!memory_planner_) { + memory_planner_.reset(new ArenaPlanner( + &context_, std::unique_ptr<GraphInfo>(new InterpreterInfo(this)))); + memory_planner_->PlanAllocations(); + } + + int last_node_prepared = 0; + + TF_LITE_ENSURE_STATUS( + PrepareOpsStartingAt(next_node_to_prepare_, &last_node_prepared)); + TF_LITE_ENSURE_STATUS(memory_planner_->ExecuteAllocations( + next_node_to_prepare_, last_node_prepared)); + + next_node_to_prepare_ = last_node_prepared + 1; + return kTfLiteOk; +} + TfLiteStatus Interpreter::Invoke() { if (!consistent_) { ReportError(&context_, "Invoke called on model that is not consistent."); @@ -384,10 +291,8 @@ TfLiteStatus Interpreter::Invoke() { TfLiteStatus status = kTfLiteOk; if (nnapi_delegate_) { - if (AllocateTensorsWhoseSizesAreKnown() == kTfLiteError) { - return kTfLiteError; - } - if (next_allocate_node_id_ == nodes_and_registration_.size()) { + TF_LITE_ENSURE_STATUS(PrepareOpsAndTensors()); + if (next_node_to_prepare_ == nodes_and_registration_.size()) { TF_LITE_ENSURE_OK(&context_, nnapi_delegate_->Invoke(this)); return kTfLiteOk; } else { @@ -400,14 +305,17 @@ TfLiteStatus Interpreter::Invoke() { } } + // Invocations are always done in node order. + // Note that calling Invoke repeatedly will cause the original memory plan to + // be reused, unless either ResizeInputTensor() or AllocateTensors() has been + // called. + // TODO(b/71913981): we should force recalculation in the presence of dynamic + // tensors, because they may have new value which in turn may affect shapes + // and allocations. for (int i = 0; i < nodes_and_registration_.size(); i++) { - // Ensure we have allocated up to this node. The point of this is to - // allocate as much as possible before running any evaluation, but - // dynamic shapes can prevent this from being possible. - if (i >= next_allocate_node_id_) { - if (AllocateTensorsWhoseSizesAreKnown() == kTfLiteError) { - return kTfLiteError; - } + if (i == next_node_to_prepare_) { + TF_LITE_ENSURE_STATUS(PrepareOpsAndTensors()); + TF_LITE_ENSURE(&context_, next_node_to_prepare_ >= i); } TfLiteNode& node = nodes_and_registration_[i].first; const TfLiteRegistration& registration = nodes_and_registration_[i].second; diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h index 65c61e44be..38dd402e8a 100644 --- a/tensorflow/contrib/lite/interpreter.h +++ b/tensorflow/contrib/lite/interpreter.h @@ -23,7 +23,7 @@ limitations under the License. #include "tensorflow/contrib/lite/allocation.h" #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/error_reporter.h" -#include "tensorflow/contrib/lite/simple_memory_arena.h" +#include "tensorflow/contrib/lite/memory_planner.h" namespace tflite { @@ -49,13 +49,6 @@ constexpr TfLiteType typeToTfLiteType<unsigned char>() { return kTfLiteUInt8; } -struct ArenaAllocRefCount { - ArenaAllocRefCount() : alloc(), count(0) {} - - ArenaAlloc alloc; - int count; -}; - // Forward declare since NNAPIDelegate uses Interpreter. class NNAPIDelegate; @@ -276,9 +269,17 @@ class Interpreter { return op_reg.invoke(&context_, node); } - // Allocate tensors whose sizes are known in order of nodes. Discontinue when - // we encounter a node that has a dynamic output tensor. - TfLiteStatus AllocateTensorsWhoseSizesAreKnown(); + // Call OpPrepare() for as many ops as possible, allocating memory for their + // tensors. If an op containing dynamic tensors is found, preparation will be + // postponed until this function is called again. This allows the interpreter + // to wait until Invoke() to resolve the sizes of dynamic tensors. + TfLiteStatus PrepareOpsAndTensors(); + + // Call OpPrepare() for all ops starting at 'first_node'. Stop when a + // dynamic tensors is found or all ops have been prepared. Fill + // 'last_node_prepared' with the id of the op containing dynamic tensors, or + // the last in the graph. + TfLiteStatus PrepareOpsStartingAt(int first_node, int* last_node_prepared); // Tensors needed by the interpreter. Use `AddTensors` to add more blank // tensor entries. Note, `tensors_.data()` needs to be synchronized to the @@ -325,17 +326,6 @@ class Interpreter { std::vector<std::pair<TfLiteNode, TfLiteRegistration>> nodes_and_registration_; - // Raw memory buffer that is allocated for all temporary and graph outputs. - // that are declared kTfLiteArenaRw. - SimpleMemoryArena arena_; - - // Raw memory buffer that is allocated for persistent tensors that are - // declared as kTfLiteArenaRwPersistent. - SimpleMemoryArena persistent_arena_; - - // Stores allocation and reference counts of all tensors. - std::vector<ArenaAllocRefCount> allocs_and_refcounts_; - // Whether the model is consistent. That is to say if the inputs and outputs // of every node and the global inputs and outputs are valid indexes into // the tensor array. @@ -356,7 +346,7 @@ class Interpreter { // The error reporter delegate that tflite will forward queries errors to. ErrorReporter* error_reporter_; - // Next node to allocate output tensors. + // Index of the next node to prepare. // During Invoke(), Interpreter will allocate input tensors first, which are // known to be fixed size. Then it will allocate outputs from nodes as many // as possible. When there is a node that produces dynamic sized tensor. @@ -364,10 +354,12 @@ class Interpreter { // node id, and execute the node to generate the output tensor before continue // to allocate successors. This process repeats until all nodes are executed. // NOTE: this relies on the order of nodes that is in topological order. - int next_allocate_node_id_; + int next_node_to_prepare_; // Whether to delegate to NN API std::unique_ptr<NNAPIDelegate> nnapi_delegate_; + + std::unique_ptr<MemoryPlanner> memory_planner_; }; } // namespace tflite diff --git a/tensorflow/contrib/lite/memory_planner.h b/tensorflow/contrib/lite/memory_planner.h new file mode 100644 index 0000000000..b11d86c375 --- /dev/null +++ b/tensorflow/contrib/lite/memory_planner.h @@ -0,0 +1,45 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_MEMORY_PLANNER_H_ +#define THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_MEMORY_PLANNER_H_ + +#include "tensorflow/contrib/lite/context.h" + +namespace tflite { + +// A MemoryPlanner is responsible for planning and executing a number of +// memory-related operations that are necessary in TF Lite. +class MemoryPlanner { + public: + virtual ~MemoryPlanner() {} + + // Plans the necessary memory allocations. This is the MemoryPlanner's + // pre-processing step and is called when the graph structure is known but + // actual size of the tensors is not. + virtual TfLiteStatus PlanAllocations() = 0; + + // Allocates the necessary memory to execute all nodes in the interval + // [first_node, last_node]. + virtual TfLiteStatus ExecuteAllocations(int first_node, int last_node) = 0; + + // Invalidates allocations made earliers. This is called when tensors sizes + // have change. All planned allocations remain, but can't be used until + // ExecuteAllocations() is called. + virtual TfLiteStatus ResetAllocations() = 0; +}; + +} // namespace tflite + +#endif // THIRD_PARTY_TENSORFLOW_CONTRIB_LITE_MEMORY_PLANNER_H_ diff --git a/tensorflow/contrib/lite/simple_memory_arena.h b/tensorflow/contrib/lite/simple_memory_arena.h index 0d0b7f9ff7..07a38c4243 100644 --- a/tensorflow/contrib/lite/simple_memory_arena.h +++ b/tensorflow/contrib/lite/simple_memory_arena.h @@ -68,6 +68,10 @@ class SimpleMemoryArena { TfLiteStatus Clear(); + int64_t BasePointer() const { + return reinterpret_cast<int64_t>(underlying_buffer_aligned_ptr_); + } + private: bool commited_; size_t arena_alignment_; |