diff options
Diffstat (limited to 'tensorflow/core/common_runtime/gpu/gpu_process_state.h')
-rw-r--r-- | tensorflow/core/common_runtime/gpu/gpu_process_state.h | 121 |
1 files changed, 121 insertions, 0 deletions
diff --git a/tensorflow/core/common_runtime/gpu/gpu_process_state.h b/tensorflow/core/common_runtime/gpu/gpu_process_state.h new file mode 100644 index 0000000000..cb41c3c6bd --- /dev/null +++ b/tensorflow/core/common_runtime/gpu/gpu_process_state.h @@ -0,0 +1,121 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_PROCESS_STATE_H_ +#define TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_PROCESS_STATE_H_ + +#include <functional> +#include <map> +#include <unordered_map> +#include <vector> + +#include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/common_runtime/process_state.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/thread_annotations.h" +#include "tensorflow/core/platform/types.h" +#include "tensorflow/core/protobuf/config.pb.h" + +namespace tensorflow { + +class Allocator; +class VisitableAllocator; +class PoolAllocator; + +// Singleton that manages per-process state when GPUs are present. +class GPUProcessState { + public: + static GPUProcessState* singleton(); + + // Query whether any GPU device has been created so far. + // Disable thread safety analysis since a race is benign here. + bool HasGPUDevice() const NO_THREAD_SAFETY_ANALYSIS { + return gpu_device_enabled_; + } + + // Set the flag to indicate a GPU device has been created. + // Disable thread safety analysis since a race is benign here. + void EnableGPUDevice() NO_THREAD_SAFETY_ANALYSIS { + gpu_device_enabled_ = true; + } + + // Returns the one GPU allocator used for the indexed GPU. + // Note that this is a system GPU index, not (necessarily) a brain + // device index. + // + // 'total_bytes' is the total number of bytes that should be made + // available to the allocator. The first call to this function for + // a given tf_gpu_id creates the allocator, so only the total_bytes + // used on that first call is used. + // + // "Allocator type" describes the type of algorithm to use for the + // underlying allocator. REQUIRES: Must be a valid type (see + // config.proto for the list of supported strings.). + // + // REQUIRES: tf_gpu_id must be a valid id for a BaseGPUDevice available in the + // current system environment. Otherwise returns nullptr. + virtual Allocator* GetGPUAllocator(const GPUOptions& options, + TfGpuId tf_gpu_id, size_t total_bytes); + + virtual Allocator* GetCUDAHostAllocator(int numa_node); + + // Registers a function to be called once on every new Region + // allocated by every GPURegionAllocator proximate to the specified + // bus. The AllocVisitor is provided with a memory pointer and the + // size of the area it identifies. The pointer is not guaranteed to + // be valid after the call terminates. The intention is for this + // interface to be used for network device memory registration. + // "bus_id" is platform-specific. On many platforms it + // should be 0. On machines with multiple PCIe buses, it should be + // the index of one of the PCIe buses. If the bus_id is invalid, + // results are undefined. + typedef std::function<void(void*, size_t)> AllocVisitor; + virtual void AddGPUAllocVisitor(int bus_id, const AllocVisitor& visitor); + + protected: + GPUProcessState(); + + // Helper method for unit tests to reset the ProcessState singleton by + // cleaning up everything. Never use in production. + virtual void TestOnlyReset(); + + ProcessState::MDMap* mem_desc_map() { + if (process_state_) return &process_state_->mem_desc_map_; + return nullptr; + } + + static GPUProcessState* instance_; + ProcessState* process_state_; // Not owned. + bool gpu_device_enabled_; + + mutex mu_; + + std::vector<VisitableAllocator*> gpu_allocators_ GUARDED_BY(mu_); + std::vector<std::vector<AllocVisitor>> gpu_visitors_ GUARDED_BY(mu_); + std::vector<Allocator*> cuda_host_allocators_ GUARDED_BY(mu_); + + virtual ~GPUProcessState(); + + // Optional RecordingAllocators that wrap the corresponding + // Allocators for runtime attribute use analysis. + std::vector<Allocator*> gpu_al_ GUARDED_BY(mu_); + std::vector<Allocator*> cuda_al_ GUARDED_BY(mu_); + + friend class GPUDeviceTest; +}; + +} // namespace tensorflow +#endif // TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_PROCESS_STATE_H_ |