diff options
Diffstat (limited to 'tensorflow/contrib/lite/toco/allocate_transient_arrays.cc')
-rw-r--r-- | tensorflow/contrib/lite/toco/allocate_transient_arrays.cc | 318 |
1 files changed, 318 insertions, 0 deletions
diff --git a/tensorflow/contrib/lite/toco/allocate_transient_arrays.cc b/tensorflow/contrib/lite/toco/allocate_transient_arrays.cc new file mode 100644 index 0000000000..2f4454d7c8 --- /dev/null +++ b/tensorflow/contrib/lite/toco/allocate_transient_arrays.cc @@ -0,0 +1,318 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include <algorithm> +#include <memory> +#include <set> +#include <string> +#include <unordered_map> +#include <utility> +#include <vector> + +#include "tensorflow/contrib/lite/toco/allocate_transient_arrays.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/model_flags.pb.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { +namespace { + +// The life span of an array. +struct ArrayLifespan { + // If true, the array is persistent state (as in a RNN). In that case, + // its allocation is permanent and the first_op, last_op members are + // unused. (The term 'transient' is a misnomer and we should think in + // terms of 'workspace' instead). + bool persistent = false; + // Index of the first op addressing that array. The array must be allocated + // just before executing this op. + std::size_t first_op = 0; + // Index of the last op addressing that array. We want to deallocate the array + // immediately after executing this op. + std::size_t last_op = 0; +}; + +bool StartsAt(const ArrayLifespan& lifespan, std::size_t op_index) { + return !lifespan.persistent && lifespan.first_op == op_index; +} + +bool EndsAt(const ArrayLifespan& lifespan, std::size_t op_index) { + return !lifespan.persistent && lifespan.last_op == op_index; +} + +// Helper function for ComputeArrayLifespans: updates one ArrayLifespan for +// one array for one op. +void UpdateArrayLifespan( + const string& array_name, std::size_t op_index, + std::unordered_map<string, ArrayLifespan>* array_lifespans) { + if (array_lifespans->count(array_name)) { + auto& lifespan = array_lifespans->at(array_name); + if (!lifespan.persistent) { + lifespan.first_op = std::min(lifespan.first_op, op_index); + lifespan.last_op = std::max(lifespan.last_op, op_index); + } + } else { + ArrayLifespan lifespan; + lifespan.first_op = op_index; + lifespan.last_op = op_index; + (*array_lifespans)[array_name] = lifespan; + } +} + +// Computes the ArrayLifespan for each array. +void ComputeArrayLifespans( + const Model& model, + std::unordered_map<string, ArrayLifespan>* array_lifespans) { + CHECK(array_lifespans->empty()); + for (const auto& rnn_state : model.flags.rnn_states()) { + ArrayLifespan lifespan; + lifespan.persistent = true; + (*array_lifespans)[rnn_state.state_array()] = lifespan; + } + for (std::size_t op_index = 0; op_index < model.operators.size(); + op_index++) { + const auto& op = model.operators[op_index]; + for (const auto& input : op->inputs) { + UpdateArrayLifespan(input, op_index, array_lifespans); + } + for (const auto& output : op->outputs) { + UpdateArrayLifespan(output, op_index, array_lifespans); + } + } +} + +inline bool operator==(const Alloc& a, const Alloc& b) { + CHECK(a.start != b.start || a.end == b.end); + return a.start == b.start; +} + +// Helper to keep track of total allocation size and of currently live +// allocations, and containing the core allocation routine. +class Allocator { + public: + Allocator() : total_size_(0) {} + + // Core allocation routine. + void Allocate(std::size_t size, Alloc* result) { + // Naive algorithm: pick the first gap between live allocations, + // that is wide enough for the new array. + std::size_t pos = 0; + for (const auto& a : live_allocs_) { + if (a.start >= pos + size) { + result->start = pos; + result->end = pos + size; + live_allocs_.insert(*result); + return; + } + pos = a.end; + } + // No sufficiently wide gap was found before an existing live allocation, + // so we allocate the new array at the end of the allocation space. + // We may then have to grow total_size_. + total_size_ = std::max(total_size_, pos + size); + result->start = pos; + result->end = pos + size; + live_allocs_.insert(*result); + } + + void Deallocate(const Alloc& a) { + auto iter = std::lower_bound(live_allocs_.begin(), live_allocs_.end(), a); + CHECK(iter != live_allocs_.end()); + CHECK(*iter == a); + live_allocs_.erase(iter); + } + + std::size_t total_size() const { return total_size_; } + + private: + std::size_t total_size_; + std::set<Alloc> live_allocs_; +}; + +// Returns the required transient allocation size (in bytes) for a given array, +// or 0 if it's not a transient array. +std::size_t TransientArraySize(const Model& model, const string& array_name, + std::size_t transient_data_alignment) { + if (!IsAllocatableTransientArray(model, array_name)) { + return 0; + } + const auto& array = model.arrays.at(array_name); + CHECK(array->has_shape()) + << "Array '" << array_name << "' doesn't have a shape"; + if (array->data_type == ArrayDataType::kNone) { + // Catch a typical issue at the moment with RNN states + for (const auto& rnn_state : model.flags.rnn_states()) { + if (rnn_state.state_array() == array_name) { + LOG(FATAL) + << "A RNN state array, " << array_name << ", still does not " + << "have a known data type after all graph transformations have " + << "run. That's mostly a toco bug --- sorry. For now, you can " + << "work around this issue by adding manually_create:true in the " + << "--rnn_state description of this RNN state."; + } + } + LOG(FATAL) << "An array, " << array_name << ", still does not " + << "have a known data type after all graph transformations have " + << "run."; + } + const std::size_t elem_size = ElementSize(array->data_type); + const std::size_t raw_size = + elem_size * RequiredBufferSizeForShape(array->shape()); + const std::size_t rounded_size = + RoundUpToNextMultipleOf(raw_size, transient_data_alignment); + return rounded_size; +} + +// Allocates an array: call this for every array just before the first +// op where it is used. +void AllocateTransientArray(const Model& model, const string& array_name, + Allocator* allocator, + std::size_t transient_data_alignment) { + if (!IsAllocatableTransientArray(model, array_name)) { + return; + } + const std::size_t size = + TransientArraySize(model, array_name, transient_data_alignment); + const auto& array = model.arrays.at(array_name); + CHECK(!array->alloc); + allocator->Allocate(size, &array->GetOrCreateAlloc()); +} + +// Deallocates an array: call this for every array just after the last +// op where it is used. +void DeallocateTransientArray(const Model& model, const string& array_name, + Allocator* allocator) { + if (!IsAllocatableTransientArray(model, array_name)) { + return; + } + const auto& array = model.arrays.at(array_name); + CHECK(!!array->alloc); + allocator->Deallocate(*array->alloc); +} + +} // namespace + +void AllocateTransientArrays(Model* model, + std::size_t transient_data_alignment) { + // Precompute the lifespans for all arrays. + std::unordered_map<string, ArrayLifespan> array_lifespans; + ComputeArrayLifespans(*model, &array_lifespans); + + // In case of variable batch, our convention will be to compute the + // allocations for batch==1, then let the inference code multiply all + // the offsets by the actual runtime batch size. Conveniently, + // the variable_batch and batch flags are mutually exclusive, and the default + // value of batch is 1, so we have nothing special to do here. Let us + // just guard this assumption with a CHECK: + bool batchless_input_shapes = true; + for (const auto& input_array : model->flags.input_arrays()) { + if (input_array.shape().empty() || input_array.shape(0) != 1) { + batchless_input_shapes = false; + break; + } + } + CHECK(!model->flags.variable_batch() || batchless_input_shapes); + + Allocator allocator; + + // Construct a sorted map of array names, so that other layout engines can + // match exactly. + std::map<string, const Array*> ordered_arrays_map; + for (const auto& pair : model->arrays) { + ordered_arrays_map[pair.first] = pair.second.get(); + } + + // Allocate persistent arrays (like RNN states). For them, 'transient' + // is a misnormer, should read 'workspace'. + for (const auto& array_pair : ordered_arrays_map) { + const string& array_name = array_pair.first; + const auto& array_lifespan = array_lifespans.find(array_name)->second; + if (array_lifespan.persistent) { + AllocateTransientArray(*model, array_name, &allocator, + transient_data_alignment); + } + } + + for (std::size_t op_index = 0; op_index < model->operators.size(); + op_index++) { + const auto& op = model->operators[op_index]; + // Allocate those arrays whose lifespan starts exactly here. + for (const auto& input : op->inputs) { + if (StartsAt(array_lifespans[input], op_index)) { + AllocateTransientArray(*model, input, &allocator, + transient_data_alignment); + } + } + for (const auto& output : op->outputs) { + if (StartsAt(array_lifespans[output], op_index)) { + AllocateTransientArray(*model, output, &allocator, + transient_data_alignment); + } + } + // Deallocate those arrays whose lifespan ends exactly here. + for (const auto& input : op->inputs) { + if (EndsAt(array_lifespans[input], op_index)) { + DeallocateTransientArray(*model, input, &allocator); + } + } + for (const auto& output : op->outputs) { + if (EndsAt(array_lifespans[output], op_index)) { + DeallocateTransientArray(*model, output, &allocator); + } + } + } + + // Just out of curiosity (not used in the actual allocation process) + // evaluate the optimal total allocated size. + // First, compute the size of persistent arrays. + std::size_t optimal_transient_alloc_size = 0; + std::size_t persistent_alloc_size = 0; + for (const auto& array_pair : ordered_arrays_map) { + const string& array_name = array_pair.first; + const auto& array_lifespan = array_lifespans.find(array_name)->second; + if (array_lifespan.persistent) { + persistent_alloc_size += + TransientArraySize(*model, array_name, transient_data_alignment); + } + } + for (const auto& op : model->operators) { + // for each operator, compute the sum of the sizes of the array that must + // be live during the execution of this operator, plus the size of + // persistent arrays that must be live at all times. + std::size_t size = persistent_alloc_size; + for (const auto& input : op->inputs) { + if (!array_lifespans[input].persistent) { + size += TransientArraySize(*model, input, transient_data_alignment); + } + } + for (const auto& output : op->outputs) { + if (!array_lifespans[output].persistent) { + size += TransientArraySize(*model, output, transient_data_alignment); + } + } + // The optimal total size is the maximum of all operator-specific sizes. + optimal_transient_alloc_size = std::max(optimal_transient_alloc_size, size); + } + + model->transient_data_size = allocator.total_size(); + model->transient_data_alignment = transient_data_alignment; + CHECK_GE(model->transient_data_size, optimal_transient_alloc_size); + LOG(INFO) << "Total transient array allocated size: " + << model->transient_data_size << " bytes, " + << "theoretical optimal value: " << optimal_transient_alloc_size + << " bytes."; + CheckInvariants(*model); +} +} // namespace toco |