aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/common_runtime/gpu/gpu_process_state.h
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/common_runtime/gpu/gpu_process_state.h')
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_process_state.h121
1 files changed, 121 insertions, 0 deletions
diff --git a/tensorflow/core/common_runtime/gpu/gpu_process_state.h b/tensorflow/core/common_runtime/gpu/gpu_process_state.h
new file mode 100644
index 0000000000..cb41c3c6bd
--- /dev/null
+++ b/tensorflow/core/common_runtime/gpu/gpu_process_state.h
@@ -0,0 +1,121 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_PROCESS_STATE_H_
+#define TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_PROCESS_STATE_H_
+
+#include <functional>
+#include <map>
+#include <unordered_map>
+#include <vector>
+
+#include "tensorflow/core/common_runtime/gpu/gpu_id.h"
+#include "tensorflow/core/common_runtime/process_state.h"
+#include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/platform/mutex.h"
+#include "tensorflow/core/platform/thread_annotations.h"
+#include "tensorflow/core/platform/types.h"
+#include "tensorflow/core/protobuf/config.pb.h"
+
+namespace tensorflow {
+
+class Allocator;
+class VisitableAllocator;
+class PoolAllocator;
+
+// Singleton that manages per-process state when GPUs are present.
+class GPUProcessState {
+ public:
+ static GPUProcessState* singleton();
+
+ // Query whether any GPU device has been created so far.
+ // Disable thread safety analysis since a race is benign here.
+ bool HasGPUDevice() const NO_THREAD_SAFETY_ANALYSIS {
+ return gpu_device_enabled_;
+ }
+
+ // Set the flag to indicate a GPU device has been created.
+ // Disable thread safety analysis since a race is benign here.
+ void EnableGPUDevice() NO_THREAD_SAFETY_ANALYSIS {
+ gpu_device_enabled_ = true;
+ }
+
+ // Returns the one GPU allocator used for the indexed GPU.
+ // Note that this is a system GPU index, not (necessarily) a brain
+ // device index.
+ //
+ // 'total_bytes' is the total number of bytes that should be made
+ // available to the allocator. The first call to this function for
+ // a given tf_gpu_id creates the allocator, so only the total_bytes
+ // used on that first call is used.
+ //
+ // "Allocator type" describes the type of algorithm to use for the
+ // underlying allocator. REQUIRES: Must be a valid type (see
+ // config.proto for the list of supported strings.).
+ //
+ // REQUIRES: tf_gpu_id must be a valid id for a BaseGPUDevice available in the
+ // current system environment. Otherwise returns nullptr.
+ virtual Allocator* GetGPUAllocator(const GPUOptions& options,
+ TfGpuId tf_gpu_id, size_t total_bytes);
+
+ virtual Allocator* GetCUDAHostAllocator(int numa_node);
+
+ // Registers a function to be called once on every new Region
+ // allocated by every GPURegionAllocator proximate to the specified
+ // bus. The AllocVisitor is provided with a memory pointer and the
+ // size of the area it identifies. The pointer is not guaranteed to
+ // be valid after the call terminates. The intention is for this
+ // interface to be used for network device memory registration.
+ // "bus_id" is platform-specific. On many platforms it
+ // should be 0. On machines with multiple PCIe buses, it should be
+ // the index of one of the PCIe buses. If the bus_id is invalid,
+ // results are undefined.
+ typedef std::function<void(void*, size_t)> AllocVisitor;
+ virtual void AddGPUAllocVisitor(int bus_id, const AllocVisitor& visitor);
+
+ protected:
+ GPUProcessState();
+
+ // Helper method for unit tests to reset the ProcessState singleton by
+ // cleaning up everything. Never use in production.
+ virtual void TestOnlyReset();
+
+ ProcessState::MDMap* mem_desc_map() {
+ if (process_state_) return &process_state_->mem_desc_map_;
+ return nullptr;
+ }
+
+ static GPUProcessState* instance_;
+ ProcessState* process_state_; // Not owned.
+ bool gpu_device_enabled_;
+
+ mutex mu_;
+
+ std::vector<VisitableAllocator*> gpu_allocators_ GUARDED_BY(mu_);
+ std::vector<std::vector<AllocVisitor>> gpu_visitors_ GUARDED_BY(mu_);
+ std::vector<Allocator*> cuda_host_allocators_ GUARDED_BY(mu_);
+
+ virtual ~GPUProcessState();
+
+ // Optional RecordingAllocators that wrap the corresponding
+ // Allocators for runtime attribute use analysis.
+ std::vector<Allocator*> gpu_al_ GUARDED_BY(mu_);
+ std::vector<Allocator*> cuda_al_ GUARDED_BY(mu_);
+
+ friend class GPUDeviceTest;
+};
+
+} // namespace tensorflow
+#endif // TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_PROCESS_STATE_H_