tensorflow/contrib/lite/arena_planner.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306

/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#include "tensorflow/contrib/lite/arena_planner.h"
#include <utility>

namespace tflite {

struct AllocationInfo {
  // The node index requesting this allocation.
  int node;
  // The tensor index to be allocated or deallocated.
  int tensor;
  // Whether to allocate or deallocate
  enum Type { ALLOC, DEALLOC } type;
};

ArenaPlanner::ArenaPlanner(TfLiteContext* context,
                           std::unique_ptr<GraphInfo> graph_info,
                           bool preserve_inputs, bool preserve_intermediates,
                           int tensor_alignment)
    : context_(context),
      graph_info_(std::move(graph_info)),
      arena_(kDefaultArenaAlignment),
      persistent_arena_(kDefaultArenaAlignment),
      preserve_inputs_(preserve_inputs),
      preserve_intermediates_(preserve_intermediates),
      tensor_alignment_(tensor_alignment) {}

ArenaPlanner::~ArenaPlanner() {}

int64_t ArenaPlanner::BasePointer(TfLiteAllocationType type) {
  if (type == kTfLiteArenaRwPersistent) {
    return persistent_arena_.BasePointer();
  }
  if (type == kTfLiteArenaRw) {
    return arena_.BasePointer();
  }
  return 0;
}

TfLiteStatus ArenaPlanner::ResetAllocations() {
  TF_LITE_ENSURE_STATUS(arena_.Clear());
  TF_LITE_ENSURE_STATUS(persistent_arena_.Clear());
  allocs_.clear();
  allocs_.resize(graph_info_->num_tensors());
  return kTfLiteOk;
}

TfLiteStatus ArenaPlanner::PlanAllocations() {
  // Invalidate any existing data.
  TF_LITE_ENSURE_STATUS(ResetAllocations());

  // Keeps track of references to each tensor.
  std::vector<int> refcounts(graph_info_->num_tensors(), 0);
  // `allocated` and `deallocated` are technically list of boolean values.
  // We're saving the compiled binary size by using `vector<int>`.
  std::vector<int> allocated(graph_info_->num_tensors(), false);
  std::vector<int> deallocated(graph_info_->num_tensors(), false);

  auto allocate = [this, &allocated, &deallocated](int node,
                                                   int tensor) -> TfLiteStatus {
    if (allocated[tensor]) {
      return kTfLiteOk;
    }
    TF_LITE_ENSURE(context_, !deallocated[tensor]);
    alloc_queue_.push_back({node, tensor, AllocationInfo::ALLOC});
    allocated[tensor] = true;
    return kTfLiteOk;
  };

  auto deallocate = [this, &allocated, &deallocated](
                        int node, int tensor) -> TfLiteStatus {
    if (!allocated[tensor]) {
      // Do not enqueue a DEALLOC if the tensor is never allocated.
      // This happened with the constant tensors.
      return kTfLiteOk;
    }
    TF_LITE_ENSURE(context_, !deallocated[tensor]);
    alloc_queue_.push_back({node, tensor, AllocationInfo::DEALLOC});
    return kTfLiteOk;
  };

  // There will be an entry in alloc_queue_ for the allocation of each tensor
  // and another for their deallocation.
  alloc_queue_.reserve(2 * graph_info_->num_tensors());

  // We must make sure the output tensors are never overwritten. We do that by
  // artificially adding one to their ref-counts so they are never selected
  // for deallocation.
  for (int tensor_index : graph_info_->outputs()) {
    refcounts[tensor_index]++;
  }

  // Variable tensors should are also never overwritten and need to be alive all
  // the time.
  for (int tensor_index : graph_info_->variables()) {
    refcounts[tensor_index]++;
  }

  // Queue all graph inputs for allocation. If preserve_inputs_ is true, make
  // sure they never be overwritten.
  for (int tensor_index : graph_info_->inputs()) {
    if (tensor_index != kOptionalTensor) {
      if (preserve_inputs_) {
        refcounts[tensor_index]++;
      }
      TF_LITE_ENSURE_STATUS(allocate(0, tensor_index));
    }
  }

  // Queue all graph variable tensors for allocation.
  for (int tensor_index : graph_info_->variables()) {
    if (tensor_index != kOptionalTensor) {
      // Increase the reference count for input tensors by one, so it will
      // never be deallocated.
      TF_LITE_ENSURE_STATUS(allocate(0, tensor_index));
    }
  }

  // Count references to node input tensors.
  for (int i = 0; i < graph_info_->num_nodes(); ++i) {
    const TfLiteNode& node = graph_info_->node(i);
    TfLiteIntArray* node_inputs = node.inputs;
    for (int j = 0; j < node_inputs->size; ++j) {
      int tensor_index = node_inputs->data[j];
      if (tensor_index != kOptionalTensor) {
        refcounts[tensor_index]++;
      }
    }
  }

  // Queue all graph inputs for allocation.
  for (int tensor_index : graph_info_->inputs()) {
    if (tensor_index != kOptionalTensor) {
      TF_LITE_ENSURE_STATUS(allocate(0, tensor_index));
    }
  }
  // Go through the graph in execution order.
  for (int i = 0; i < graph_info_->num_nodes(); ++i) {
    const TfLiteNode& node = graph_info_->node(i);

    // First queue output tensors for allocation.
    TfLiteIntArray* node_outputs = node.outputs;
    for (int j = 0; j < node_outputs->size; ++j) {
      int tensor_index = node_outputs->data[j];
      TF_LITE_ENSURE_STATUS(allocate(i, tensor_index));
    }

    // Then update the ref-counts of the node's inputs, and if necessary queue
    // them for deallocation.
    if (!preserve_intermediates_) {
      TfLiteIntArray* node_inputs = node.inputs;
      for (int j = 0; j < node_inputs->size; ++j) {
        int tensor_index = node_inputs->data[j];
        if (tensor_index != kOptionalTensor) {
          refcounts[tensor_index]--;
          if (refcounts[tensor_index] == 0) {
            TF_LITE_ENSURE_STATUS(deallocate(i, tensor_index));
          }
        }
      }
    }
  }

  // Note that graph outputs will never be scheduled for deallocation. We
  // could do that here for completeness, but it won't have any effect.
  return kTfLiteOk;
}

TfLiteStatus ArenaPlanner::ExecuteAllocations(int first_node, int last_node) {
  // Grow the size of `allocs_` if necessary. This allows allocating temporary
  // tensors in op's `prepare` function.
  TF_LITE_ENSURE(context_, graph_info_->num_tensors() >= allocs_.size());
  allocs_.resize(graph_info_->num_tensors());

  TF_LITE_ENSURE_STATUS(CalculateAllocations(first_node, last_node));
  TF_LITE_ENSURE_STATUS(Commit());

  for (int i = 0; i < graph_info_->num_tensors(); ++i) {
    // TODO(ahentz): we could do this only for the tensors that were modified
    // in CalculateAllocations(), instead of redoing it for tensors that
    // already had proper pointers. However we must be very careful, because
    // SimpleMemoryArena::Commit() could move the base pointer.
    TF_LITE_ENSURE_STATUS(ResolveTensorAllocation(i));
  }

  return kTfLiteOk;
}

TfLiteStatus ArenaPlanner::Commit() {
  TF_LITE_ENSURE_STATUS(arena_.Commit(context_));
  TF_LITE_ENSURE_STATUS(persistent_arena_.Commit(context_));
  return kTfLiteOk;
}

TfLiteStatus ArenaPlanner::CalculateAllocations(int first_node, int last_node) {
  int active_node = first_node;
  // When dynamic tensors are present this method is called multiple times.
  // The items in the alloc_queue_ referring to nodes before first_node were
  // processed previously and should be skipped. Entries after last_node are
  // not yet ready to be handled.
  for (const auto& alloc_info : alloc_queue_) {
    if (alloc_info.node < first_node) continue;
    if (alloc_info.node > last_node) break;
    if (alloc_info.node == active_node) {
      // This is the first allocation/deallocation for a given node.  It is
      // time to deallocate the previous temporaries and allocate new ones.
      if (active_node != first_node) {
        TF_LITE_ENSURE_STATUS(
            CalculateDeallocationOfInternalTensors(active_node - 1));
      }
      TF_LITE_ENSURE_STATUS(CalculateAllocationOfInternalTensors(active_node));
      ++active_node;
    }
    // Handle the current item.
    if (alloc_info.type == AllocationInfo::ALLOC) {
      TF_LITE_ENSURE_STATUS(CalculateTensorAllocation(alloc_info.tensor));
    } else {
      TF_LITE_ENSURE_STATUS(CalculateTensorDeallocation(alloc_info.tensor));
    }
  }

  // Don't forget to deallocate temporaries of last node.
  TF_LITE_ENSURE_STATUS(
      CalculateDeallocationOfInternalTensors(active_node - 1));

  return kTfLiteOk;
}

TfLiteStatus ArenaPlanner::ResolveTensorAllocation(int tensor_index) {
  TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
  if (tensor.allocation_type == kTfLiteArenaRw) {
    // Skip resolution if the size of the tensor is zero, leaving it as a
    // nullptr.
    if (allocs_[tensor_index].size != 0) {
      TF_LITE_ENSURE_STATUS(arena_.ResolveAlloc(context_, allocs_[tensor_index],
                                                &tensor.data.raw));
    }
  }
  if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
    TF_LITE_ENSURE_STATUS(persistent_arena_.ResolveAlloc(
        context_, allocs_[tensor_index], &tensor.data.raw));
  }
  return kTfLiteOk;
}

TfLiteStatus ArenaPlanner::CalculateTensorAllocation(int tensor_index) {
  TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
  if (tensor.allocation_type == kTfLiteArenaRw) {
    TF_LITE_ENSURE_STATUS(arena_.Allocate(
        context_, tensor_alignment_, tensor.bytes, &allocs_[tensor_index]));
  }
  if (tensor.allocation_type == kTfLiteArenaRwPersistent) {
    TF_LITE_ENSURE_STATUS(persistent_arena_.Allocate(
        context_, tensor_alignment_, tensor.bytes, &allocs_[tensor_index]));
  }
  return kTfLiteOk;
}

TfLiteStatus ArenaPlanner::CalculateTensorDeallocation(int tensor_index) {
  TfLiteTensor& tensor = *graph_info_->tensor(tensor_index);
  if (tensor.allocation_type == kTfLiteArenaRw) {
    TF_LITE_ENSURE_STATUS(arena_.Deallocate(context_, allocs_[tensor_index]));
  }
  return kTfLiteOk;
}

TfLiteStatus ArenaPlanner::CalculateAllocationOfInternalTensors(
    int node_index) {
  if (node_index < graph_info_->num_nodes()) {
    const TfLiteNode& node = graph_info_->node(node_index);
    TfLiteIntArray* node_temporaries = node.temporaries;
    for (int i = 0; i < node_temporaries->size; ++i) {
      int tensor_index = node_temporaries->data[i];
      TF_LITE_ENSURE_STATUS(CalculateTensorAllocation(tensor_index));
    }
  }
  return kTfLiteOk;
}

TfLiteStatus ArenaPlanner::CalculateDeallocationOfInternalTensors(
    int node_index) {
  if (node_index < graph_info_->num_nodes()) {
    const TfLiteNode& node = graph_info_->node(node_index);
    TfLiteIntArray* node_temporaries = node.temporaries;
    for (int i = 0; i < node_temporaries->size; ++i) {
      int tensor_index = node_temporaries->data[i];
      TF_LITE_ENSURE_STATUS(CalculateTensorDeallocation(tensor_index));
    }
  }
  return kTfLiteOk;
}

}  // namespace tflite