aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/scratch_allocator.h
diff options
context:
space:
mode:
authorGravatar Vijay Vasudevan <vrv@google.com>2015-12-08 09:58:59 -0800
committerGravatar Vijay Vasudevan <vrv@google.com>2015-12-08 09:58:59 -0800
commitddd4aaf5286de24ba70402ee0ec8b836d3aed8c7 (patch)
tree4efdf6cf4d69b45041fd2a02cd2b7327ea9f1f58 /tensorflow/stream_executor/scratch_allocator.h
parentcd53f3c3302c9312c1840389a9988a879b8b9dd5 (diff)
TensorFlow: upstream changes to git.
Change 109695551 Update FAQ Change 109694725 Add a gradient for resize_bilinear op. Change 109694505 Don't mention variables module in docs variables.Variable should be tf.Variable. Change 109658848 Adding an option to create a new thread-pool for each session. Change 109640570 Take the snapshot of stream-executor. + Expose an interface for scratch space allocation in the interface. Change 109638559 Let image_summary accept uint8 input This allows users to do their own normalization / scaling if the default (very weird) behavior of image_summary is undesired. This required a slight tweak to fake_input.cc to make polymorphically typed fake inputs infer if their type attr is not set but has a default. Unfortunately, adding a second valid type to image_summary *disables* automatic implicit conversion from np.float64 to tf.float32, so this change is slightly backwards incompatible. Change 109636969 Add serialization operations for SparseTensor. Change 109636644 Update generated Op docs. Change 109634899 TensorFlow: add a markdown file for producing release notes for our releases. Seed with 0.5.0 with a boring but accurate description. Change 109634502 Let histogram_summary take any realnumbertype It used to take only floats, not it understands ints. Change 109634434 TensorFlow: update locations where we mention python 3 support, update them to current truth. Change 109632108 Move HSV <> RGB conversions, grayscale conversions, and adjust_* ops back to tensorflow - make GPU-capable version of RGBToHSV and HSVToRGB, allows only float input/output - change docs to reflect new size constraints - change HSV format to be [0,1] for all components - add automatic dtype conversion for all adjust_* and grayscale conversion ops - fix up docs Change 109631077 Improve optimizer exceptions 1. grads_and_vars is now a tuple, so must be wrapped when passed to format. 2. Use '%r' instead of '%s' for dtype formatting Base CL: 109697989
Diffstat (limited to 'tensorflow/stream_executor/scratch_allocator.h')
-rw-r--r--tensorflow/stream_executor/scratch_allocator.h83
1 files changed, 83 insertions, 0 deletions
diff --git a/tensorflow/stream_executor/scratch_allocator.h b/tensorflow/stream_executor/scratch_allocator.h
new file mode 100644
index 0000000000..52697d6f8e
--- /dev/null
+++ b/tensorflow/stream_executor/scratch_allocator.h
@@ -0,0 +1,83 @@
+/* Copyright 2015 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_STREAM_EXECUTOR_SCRATCH_ALLOCATOR_H_
+#define TENSORFLOW_STREAM_EXECUTOR_SCRATCH_ALLOCATOR_H_
+
+#include <memory>
+
+#include "tensorflow/stream_executor/device_memory.h"
+#include "tensorflow/stream_executor/lib/statusor.h"
+#include "tensorflow/stream_executor/platform/port.h"
+#include "tensorflow/stream_executor/temporary_device_memory.h"
+
+namespace perftools {
+namespace gputools {
+
+class Stream;
+
+// Interface that allows stream operations (e.g.
+// Stream::ThenConvolveWithScratch) to optionally request scratch space be
+// allocated in order to speed up the operation being enqueued.
+//
+// Note that the caller is responsible for deallocating the scratch space at a
+// known-safe point, when all scratch-memory-consuming kernels are known for
+// sure to have finished; e.g. at stream synchronization time. This is different
+// from a traditional C++ object allocator, where the client is responsible for
+// releasing. (Conceptually, scratch memory is a form of "temporary" device
+// memory allocation.)
+class ScratchAllocator {
+ public:
+ virtual ~ScratchAllocator();
+
+ // Returns a limit of memory this scratch allocator wants to produce, in
+ // bytes. This information may be used to help select an algorithm.
+ //
+ // Returns values < 0 to indicate that there is no recommended limit.
+ virtual int64 GetMemoryLimitInBytes(Stream* stream) = 0;
+
+ // Returns an allocation on byte_size bytes for use in an operation on stream.
+ //
+ // This is a temporary allocation, and the caller is responsible for
+ // deallocating at some known-safe point. See the class comment above.
+ virtual port::StatusOr<DeviceMemory<uint8>> AllocateBytes(
+ Stream* stream, int64 byte_size) = 0;
+};
+
+// Allocates a single temporary memory allocation -- this memory is deallocated
+// at the next stream synchronization point after this object has gone out of
+// scope. This satisfies the lifetime and deallocation properties given in the
+// class comment above.
+//
+// Thread-compatible, but not thread-safe (use in scenarios where only one
+// thread will request the scratch allocation).
+class OneTimeScratchAllocator : public ScratchAllocator {
+ public:
+ OneTimeScratchAllocator();
+ ~OneTimeScratchAllocator() override;
+ int64 GetMemoryLimitInBytes(Stream* stream) override;
+ port::StatusOr<DeviceMemory<uint8>> AllocateBytes(Stream* stream,
+ int64 byte_size) override;
+
+ private:
+ std::unique_ptr<TemporaryDeviceMemory<uint8>> temporary_;
+
+ SE_DISALLOW_COPY_AND_ASSIGN(OneTimeScratchAllocator);
+};
+
+} // namespace gputools
+} // namespace perftools
+
+#endif // TENSORFLOW_STREAM_EXECUTOR_SCRATCH_ALLOCATOR_H_