1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
#if !GOOGLE_CUDA
#error This file must only be included when building with Cuda support
#endif
#ifndef TENSORFLOW_COMMON_RUNTIME_GPU_GPU_DEVICE_H_
#define TENSORFLOW_COMMON_RUNTIME_GPU_GPU_DEVICE_H_
#include "tensorflow/core/common_runtime/device_factory.h"
#include "tensorflow/core/common_runtime/gpu_device_context.h"
#include "tensorflow/core/common_runtime/local_device.h"
#include "tensorflow/core/framework/allocator.h"
#include "tensorflow/core/framework/device_base.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/platform/port.h"
#include "tensorflow/core/public/session_options.h"
#include "tensorflow/core/public/status.h"
#include "tensorflow/core/public/tensor.h"
#include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"
#include "tensorflow/stream_executor/stream.h"
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
namespace tensorflow {
class EigenAllocator;
class BaseGPUDevice : public LocalDevice {
public:
BaseGPUDevice(const SessionOptions& options, const string& name,
Bytes memory_limit, BusAdjacency bus_adjacency, int gpu_id,
const string& physical_device_desc, Allocator* gpu_allocator,
Allocator* cpu_allocator);
~BaseGPUDevice() override;
// GPU devices require the Op Compute method to save a reference to
// any temporary tensors that are allocated until the Op execution
// completes.
bool SaveTemporaryTensors() const override { return true; }
Status FillContextMap(const Graph* graph,
DeviceContextMap* device_context_map);
void Compute(OpKernel* op_kernel, OpKernelContext* context) override;
Status Sync() override;
void ComputeAsync(AsyncOpKernel* op_kernel, OpKernelContext* context,
AsyncOpKernel::DoneCallback done) override;
Status MakeTensorFromProto(const TensorProto& tensor_proto,
const AllocatorAttributes alloc_attrs,
Tensor* tensor) override;
// The caller owns the returned device.
const PerOpGpuDevice* MakeGpuDevice(DeviceContext* dc,
Allocator* allocator) override;
protected:
Allocator* gpu_allocator_; // not owned
Allocator* cpu_allocator_; // not owned
private:
std::vector<gpu::Stream*> streams_;
std::vector<GPUDeviceContext*> device_contexts_;
GpuDeviceInfo* gpu_device_info_ = nullptr;
mutex trace_mu_;
int gpu_id_ = -1;
std::unique_ptr<EventMgr> em_;
const PerOpGpuDevice* NewDevice(int stream_id, Allocator* allocator);
};
class BaseGPUDeviceFactory : public DeviceFactory {
public:
void CreateDevices(const SessionOptions& options, const string& name_prefix,
std::vector<Device*>* devices) override;
private:
LocalDevice* CreateGPUDevice(const SessionOptions& options,
const string& name, int gpu_id);
virtual LocalDevice* CreateGPUDevice(const SessionOptions& options,
const string& name, Bytes memory_limit,
BusAdjacency bus_adjacency, int gpu_id,
const string& physical_device_desc,
Allocator* gpu_allocator,
Allocator* cpu_allocator) = 0;
void GetValidDeviceIds(std::vector<int>* ids);
};
} // namespace tensorflow
#endif // TENSORFLOW_COMMON_RUNTIME_GPU_GPU_DEVICE_H_
|