diff options
author | Peter Hawkins <phawkins@google.com> | 2017-01-09 12:04:37 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-01-09 12:26:35 -0800 |
commit | 1e67c90e2caceeff82d09793d1ef5fa0300d219b (patch) | |
tree | 6567ea8b0fa01fcfcd608b7e4c636865d33c7032 /tensorflow/compiler/xla/service/backend.h | |
parent | 7ad7e4dfae4344d6b955b5eb61dc4b6bb792f1b3 (diff) |
Initial open-source release of XLA: Accelerated Linear Algebra.
XLA is a compiler-based linear algebra execution engine that targets CPUs, GPUs and custom accelerators.
XLA is still experimental; we are releasing it early to get the community involved.
Change: 143990941
Diffstat (limited to 'tensorflow/compiler/xla/service/backend.h')
-rw-r--r-- | tensorflow/compiler/xla/service/backend.h | 191 |
1 files changed, 191 insertions, 0 deletions
diff --git a/tensorflow/compiler/xla/service/backend.h b/tensorflow/compiler/xla/service/backend.h new file mode 100644 index 0000000000..17c53d299e --- /dev/null +++ b/tensorflow/compiler/xla/service/backend.h @@ -0,0 +1,191 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_ + +#include <map> +#include <memory> +#include <string> +#include <vector> + +#include "tensorflow/compiler/xla/service/compiler.h" +#include "tensorflow/compiler/xla/service/device_memory_allocator.h" +#include "tensorflow/compiler/xla/service/transfer_manager.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/stream_executor_no_cuda.h" +#include "tensorflow/core/platform/thread_annotations.h" + +namespace Eigen { +class ThreadPoolDevice; +} + +namespace xla { + +// Class which encapsulates an XLA backend. It includes everything necessary +// to compile and execute computations on a particular platform. +// +// It also offers a pooling API for creation/use of initialized streams: +// +// std::unique_ptr<se::Stream> stream = +// backend->AcquireStream().ConsumeValueOrDie(); +// // ... use stream ... +// backend->ReleaseStream(std::move(stream)); +class Backend { + public: + // The number of streams we create for the pool at initialization time. + static constexpr int kInitialStreamsToPool = 8; + + // Creates a new backend for the given platform with the given number of + // replicas. A value of -1 means to use the flag value. + static StatusOr<std::unique_ptr<Backend>> CreateBackend( + perftools::gputools::Platform* platform, int64 replica_count = -1); + + // Creates a backend for the default platform. The default platform is defined + // in PlatformUtil. + static StatusOr<std::unique_ptr<Backend>> CreateDefaultBackend(); + + ~Backend(); + + // Accessors for the various objects. + perftools::gputools::Platform* platform() const { return platform_; } + Compiler* compiler() const { return compiler_; } + DeviceMemoryAllocator* memory_allocator() const { + return memory_allocator_.get(); + } + TransferManager* transfer_manager() const { return transfer_manager_; } + + // Returns the number of devices of the platform type which are visible. Not + // all of these devices may be usable by XLA. + int device_count() const { return stream_executors_.size(); } + + // Returns the device ordinal number of the default device. + int default_device_ordinal() const; + + // Returns stream executors of all supported devices for this backend. The + // executors are ordered by the device ordinal. + const std::vector<perftools::gputools::StreamExecutor*>& stream_executors() + const { + return stream_executors_; + } + + // Returns the replicas for the default stream executor. + // + // When the number of replicas is R, the first R stream executors are assigned + // to the replicas of the default stream executor. + std::vector<perftools::gputools::StreamExecutor*> Replicas() const; + + // Returns the replicas for the given device_ordinal. The given device ordinal + // is considered to be the first device ordinal among the replicas. Returns an + // error status if the stream executor for the given given device ordinal does + // not exist or if there are not enough stream executors for the replicas. + StatusOr<std::vector<perftools::gputools::StreamExecutor*>> Replicas( + int device_ordinal) const; + + // Return the stream executor for the given device ordinal. + StatusOr<perftools::gputools::StreamExecutor*> stream_executor( + int device_ordinal) const; + + // Return the stream executor for the default device ordinal. + perftools::gputools::StreamExecutor* default_stream_executor() const { + CHECK(!stream_executors_.empty()); + return stream_executors_[0]; + } + + // Primes the internal pool of streams for AcquireStream/ReleaseStream with n + // initialized stream instances. + tensorflow::Status PoolStreams(int n, + perftools::gputools::StreamExecutor* executor); + + // Acquires a stream for use by the caller, either by grabbing it from an + // internal pool, or by constructing/initializating it, and returns the result + // to the caller. + // + // TODO(b/32989582): Return std::unique_ptr with custom deleter. + StatusOr<std::unique_ptr<perftools::gputools::Stream>> AcquireStream( + perftools::gputools::StreamExecutor* executor); + + // Releases a stream from the caller to the internal pool, for use with the + // paired AcquireStream above. + void ReleaseStream(std::unique_ptr<perftools::gputools::Stream> stream); + + // Returns whether the given device ordinal of the backend is supported. + bool device_ordinal_supported(int device_ordinal) const { + return (device_ordinal >= 0 && device_ordinal < device_count() && + stream_executors_[device_ordinal] != nullptr); + } + + // Return a string identifier for the given device, eg: "GPU:3". + string device_name(int device_ordinal) const { + return tensorflow::strings::StrCat(platform_->Name(), ":", device_ordinal); + } + + // Returns true if the devices with the given ordinals are equivalent from + // XLA's perspective. That is, an executable compiled for one device would + // be equivalent to an executable compiled for the other. + StatusOr<bool> devices_equivalent(int device_ordinal_a, int device_ordinal_b); + + // For the host platform, returns the threadpool to use when scheduling + // parallel operators. For other platforms, returns NULL. + tensorflow::thread::ThreadPool* inter_op_thread_pool() const; + + // For the host platform, returns the configured eigen threadpool device to be + // used for scheduling work. For other platforms, returns NULL. + const Eigen::ThreadPoolDevice* eigen_intra_op_thread_pool_device() const; + + private: + struct EigenThreadPoolWrapper; + Backend(int64 replica_count, perftools::gputools::Platform* platform, + Compiler* compiler, + tensorflow::gtl::ArraySlice<perftools::gputools::StreamExecutor*> + stream_executors, + TransferManager* transfer_manager); + Backend(const Backend&) = delete; + Backend& operator=(const Backend&) = delete; + + perftools::gputools::Platform* platform_; + Compiler* compiler_; + TransferManager* transfer_manager_; + int64 replica_count_ = -1; + + // Vector of stream executors. stream_executors_[0] is the default executor. + std::vector<perftools::gputools::StreamExecutor*> stream_executors_; + + // Guards the mutable state in the backend object. + tensorflow::mutex mutex_; + + // Mapping from stream executor to cached streams, used by + // AcquireStream/ReleaseStream above. + std::map<perftools::gputools::StreamExecutor*, + std::vector<std::unique_ptr<perftools::gputools::Stream>>> + cached_streams_ GUARDED_BY(mutex_); + + // The default memory allocator to use. + std::unique_ptr<StreamExecutorMemoryAllocator> memory_allocator_; + + // For the CPU backend, a threadpool for scheduling parallel operators. + std::unique_ptr<tensorflow::thread::ThreadPool> inter_op_thread_pool_; + + // For the CPU backend, an Eigen threadpool device for use by Eigen code. + std::unique_ptr<EigenThreadPoolWrapper> intra_op_thread_pool_wrapper_; +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_BACKEND_H_ |