diff options
author | Vijay Vasudevan <vrv@google.com> | 2015-12-08 09:58:59 -0800 |
---|---|---|
committer | Vijay Vasudevan <vrv@google.com> | 2015-12-08 09:58:59 -0800 |
commit | ddd4aaf5286de24ba70402ee0ec8b836d3aed8c7 (patch) | |
tree | 4efdf6cf4d69b45041fd2a02cd2b7327ea9f1f58 /tensorflow/stream_executor/scratch_allocator.h | |
parent | cd53f3c3302c9312c1840389a9988a879b8b9dd5 (diff) |
TensorFlow: upstream changes to git.
Change 109695551
Update FAQ
Change 109694725
Add a gradient for resize_bilinear op.
Change 109694505
Don't mention variables module in docs
variables.Variable should be tf.Variable.
Change 109658848
Adding an option to create a new thread-pool for each session.
Change 109640570
Take the snapshot of stream-executor.
+ Expose an interface for scratch space allocation in the interface.
Change 109638559
Let image_summary accept uint8 input
This allows users to do their own normalization / scaling if the default
(very weird) behavior of image_summary is undesired.
This required a slight tweak to fake_input.cc to make polymorphically typed
fake inputs infer if their type attr is not set but has a default.
Unfortunately, adding a second valid type to image_summary *disables* automatic
implicit conversion from np.float64 to tf.float32, so this change is slightly
backwards incompatible.
Change 109636969
Add serialization operations for SparseTensor.
Change 109636644
Update generated Op docs.
Change 109634899
TensorFlow: add a markdown file for producing release notes for our
releases. Seed with 0.5.0 with a boring but accurate description.
Change 109634502
Let histogram_summary take any realnumbertype
It used to take only floats, not it understands ints.
Change 109634434
TensorFlow: update locations where we mention python 3 support, update
them to current truth.
Change 109632108
Move HSV <> RGB conversions, grayscale conversions, and adjust_* ops back to tensorflow
- make GPU-capable version of RGBToHSV and HSVToRGB, allows only float input/output
- change docs to reflect new size constraints
- change HSV format to be [0,1] for all components
- add automatic dtype conversion for all adjust_* and grayscale conversion ops
- fix up docs
Change 109631077
Improve optimizer exceptions
1. grads_and_vars is now a tuple, so must be wrapped when passed to format.
2. Use '%r' instead of '%s' for dtype formatting
Base CL: 109697989
Diffstat (limited to 'tensorflow/stream_executor/scratch_allocator.h')
-rw-r--r-- | tensorflow/stream_executor/scratch_allocator.h | 83 |
1 files changed, 83 insertions, 0 deletions
diff --git a/tensorflow/stream_executor/scratch_allocator.h b/tensorflow/stream_executor/scratch_allocator.h new file mode 100644 index 0000000000..52697d6f8e --- /dev/null +++ b/tensorflow/stream_executor/scratch_allocator.h @@ -0,0 +1,83 @@ +/* Copyright 2015 Google Inc. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_STREAM_EXECUTOR_SCRATCH_ALLOCATOR_H_ +#define TENSORFLOW_STREAM_EXECUTOR_SCRATCH_ALLOCATOR_H_ + +#include <memory> + +#include "tensorflow/stream_executor/device_memory.h" +#include "tensorflow/stream_executor/lib/statusor.h" +#include "tensorflow/stream_executor/platform/port.h" +#include "tensorflow/stream_executor/temporary_device_memory.h" + +namespace perftools { +namespace gputools { + +class Stream; + +// Interface that allows stream operations (e.g. +// Stream::ThenConvolveWithScratch) to optionally request scratch space be +// allocated in order to speed up the operation being enqueued. +// +// Note that the caller is responsible for deallocating the scratch space at a +// known-safe point, when all scratch-memory-consuming kernels are known for +// sure to have finished; e.g. at stream synchronization time. This is different +// from a traditional C++ object allocator, where the client is responsible for +// releasing. (Conceptually, scratch memory is a form of "temporary" device +// memory allocation.) +class ScratchAllocator { + public: + virtual ~ScratchAllocator(); + + // Returns a limit of memory this scratch allocator wants to produce, in + // bytes. This information may be used to help select an algorithm. + // + // Returns values < 0 to indicate that there is no recommended limit. + virtual int64 GetMemoryLimitInBytes(Stream* stream) = 0; + + // Returns an allocation on byte_size bytes for use in an operation on stream. + // + // This is a temporary allocation, and the caller is responsible for + // deallocating at some known-safe point. See the class comment above. + virtual port::StatusOr<DeviceMemory<uint8>> AllocateBytes( + Stream* stream, int64 byte_size) = 0; +}; + +// Allocates a single temporary memory allocation -- this memory is deallocated +// at the next stream synchronization point after this object has gone out of +// scope. This satisfies the lifetime and deallocation properties given in the +// class comment above. +// +// Thread-compatible, but not thread-safe (use in scenarios where only one +// thread will request the scratch allocation). +class OneTimeScratchAllocator : public ScratchAllocator { + public: + OneTimeScratchAllocator(); + ~OneTimeScratchAllocator() override; + int64 GetMemoryLimitInBytes(Stream* stream) override; + port::StatusOr<DeviceMemory<uint8>> AllocateBytes(Stream* stream, + int64 byte_size) override; + + private: + std::unique_ptr<TemporaryDeviceMemory<uint8>> temporary_; + + SE_DISALLOW_COPY_AND_ASSIGN(OneTimeScratchAllocator); +}; + +} // namespace gputools +} // namespace perftools + +#endif // TENSORFLOW_STREAM_EXECUTOR_SCRATCH_ALLOCATOR_H_ |