aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/common_runtime/gpu/gpu_device.h
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/common_runtime/gpu/gpu_device.h')
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_device.h94
1 files changed, 94 insertions, 0 deletions
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h
new file mode 100644
index 0000000000..a415224d95
--- /dev/null
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.h
@@ -0,0 +1,94 @@
+#if !GOOGLE_CUDA
+#error This file must only be included when building with Cuda support
+#endif
+
+#ifndef TENSORFLOW_COMMON_RUNTIME_GPU_GPU_DEVICE_H_
+#define TENSORFLOW_COMMON_RUNTIME_GPU_GPU_DEVICE_H_
+
+#include "tensorflow/core/common_runtime/device_factory.h"
+#include "tensorflow/core/common_runtime/gpu_device_context.h"
+#include "tensorflow/core/common_runtime/local_device.h"
+#include "tensorflow/core/framework/allocator.h"
+#include "tensorflow/core/framework/device_base.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/platform/port.h"
+#include "tensorflow/core/public/session_options.h"
+#include "tensorflow/core/public/status.h"
+#include "tensorflow/core/public/tensor.h"
+#include "tensorflow/core/common_runtime/gpu/gpu_event_mgr.h"
+#include "tensorflow/stream_executor/stream.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
+namespace tensorflow {
+
+class EigenAllocator;
+
+class BaseGPUDevice : public LocalDevice {
+ public:
+ BaseGPUDevice(const SessionOptions& options, const string& name,
+ Bytes memory_limit, BusAdjacency bus_adjacency, int gpu_id,
+ const string& physical_device_desc, Allocator* gpu_allocator,
+ Allocator* cpu_allocator);
+
+ ~BaseGPUDevice() override;
+
+ // GPU devices require the Op Compute method to save a reference to
+ // any temporary tensors that are allocated until the Op execution
+ // completes.
+ bool SaveTemporaryTensors() const override { return true; }
+
+ Status FillContextMap(const Graph* graph,
+ DeviceContextMap* device_context_map);
+
+ void Compute(OpKernel* op_kernel, OpKernelContext* context) override;
+
+ Status Sync() override;
+
+ void ComputeAsync(AsyncOpKernel* op_kernel, OpKernelContext* context,
+ AsyncOpKernel::DoneCallback done) override;
+
+ Status MakeTensorFromProto(const TensorProto& tensor_proto,
+ const AllocatorAttributes alloc_attrs,
+ Tensor* tensor) override;
+
+ // The caller owns the returned device.
+ const PerOpGpuDevice* MakeGpuDevice(DeviceContext* dc,
+ Allocator* allocator) override;
+
+ protected:
+ Allocator* gpu_allocator_; // not owned
+ Allocator* cpu_allocator_; // not owned
+
+ private:
+ std::vector<gpu::Stream*> streams_;
+ std::vector<GPUDeviceContext*> device_contexts_;
+ GpuDeviceInfo* gpu_device_info_ = nullptr;
+ mutex trace_mu_;
+ int gpu_id_ = -1;
+ std::unique_ptr<EventMgr> em_;
+
+ const PerOpGpuDevice* NewDevice(int stream_id, Allocator* allocator);
+};
+
+class BaseGPUDeviceFactory : public DeviceFactory {
+ public:
+ void CreateDevices(const SessionOptions& options, const string& name_prefix,
+ std::vector<Device*>* devices) override;
+
+ private:
+ LocalDevice* CreateGPUDevice(const SessionOptions& options,
+ const string& name, int gpu_id);
+
+ virtual LocalDevice* CreateGPUDevice(const SessionOptions& options,
+ const string& name, Bytes memory_limit,
+ BusAdjacency bus_adjacency, int gpu_id,
+ const string& physical_device_desc,
+ Allocator* gpu_allocator,
+ Allocator* cpu_allocator) = 0;
+
+ void GetValidDeviceIds(std::vector<int>* ids);
+};
+
+} // namespace tensorflow
+
+#endif // TENSORFLOW_COMMON_RUNTIME_GPU_GPU_DEVICE_H_