diff options
Diffstat (limited to 'tensorflow/core/common_runtime/gpu/gpu_device.h')
-rw-r--r-- | tensorflow/core/common_runtime/gpu/gpu_device.h | 36 |
1 files changed, 23 insertions, 13 deletions
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h index 56d03d7a8c..674e8384d5 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.h +++ b/tensorflow/core/common_runtime/gpu/gpu_device.h @@ -65,6 +65,11 @@ class BaseGPUDevice : public LocalDevice { // completes. bool RequiresRecordingAccessedTensors() const override; + // GPU kernel execution requires us to use `tracing::ScopedAnnotation()` + // rather than `tracing::ScopedActivity()`, in order to relate asynchronously + // launched GPU kernels to the OpKernel. + bool TraceUsingAnnotations() const { return true; } + void ConsumeListOfAccessedTensors( DeviceContext* device_context, const TensorReferenceVector& tensor_refs) override; @@ -86,15 +91,16 @@ class BaseGPUDevice : public LocalDevice { // The caller owns the returned device. PerOpGpuDevice* MakeGpuDevice() override; - void ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device, - DeviceContext* dc, Allocator* allocator) override; + Status ReinitializeGpuDevice(OpKernelContext* context, PerOpGpuDevice* device, + DeviceContext* dc, + Allocator* allocator) override; - // Returns the CUDA GPU id of this device within the native driver system; + // Returns the platform GPU id of this device within the native driver system; // e.g., for CUDA this is the ordinal of the GPU within the system. int gpu_id() const { - CudaGpuId cuda_gpu_id; - TF_CHECK_OK(GpuIdManager::TfToCudaGpuId(tf_gpu_id_, &cuda_gpu_id)); - return cuda_gpu_id.value(); + PlatformGpuId platform_gpu_id; + TF_CHECK_OK(GpuIdManager::TfToPlatformGpuId(tf_gpu_id_, &platform_gpu_id)); + return platform_gpu_id.value(); } // The executor that provides control for the device; e.g., for CUDA this @@ -125,6 +131,7 @@ class BaseGPUDevice : public LocalDevice { class StreamGroupFactory; gtl::InlinedVector<StreamGroup*, 4> streams_; + mutex scratch_init_mutex_; gtl::InlinedVector<char*, 4> scratch_; std::vector<GPUDeviceContext*> device_contexts_; GpuDeviceInfo* gpu_device_info_ = nullptr; @@ -135,6 +142,9 @@ class BaseGPUDevice : public LocalDevice { std::unique_ptr<EventMgr> em_; std::unique_ptr<thread::ThreadPool> thread_pool_; + // Initialize scractch buffers used by Eigen. + Status InitScratchBuffers(); + void ReinitializeDevice(OpKernelContext* context, PerOpGpuDevice* device, int stream_id, Allocator* allocator); @@ -168,14 +178,14 @@ class BaseGPUDeviceFactory : public DeviceFactory { int32 strength; static const int kSameDeviceStrength; static const int kStreamExecutorStrength; - std::set<std::pair<CudaGpuId, CudaGpuId>> directed_links; + std::set<std::pair<PlatformGpuId, PlatformGpuId>> directed_links; }; protected: // Populates *maps with interconnect maps for all local direct access // pathways between GPUs. virtual Status GetInterconnectMaps( - const std::vector<CudaGpuId>& visible_gpu_order, + const std::vector<PlatformGpuId>& visible_gpu_order, se::Platform* gpu_manager, std::vector<InterconnectMap>* maps); struct TfGpuIdHash { @@ -207,16 +217,16 @@ class BaseGPUDeviceFactory : public DeviceFactory { Allocator* gpu_allocator, Allocator* cpu_allocator) = 0; - // Returns into 'ids' the list of valid CUDA GPU ids, in the order that + // Returns into 'ids' the list of valid platform GPU ids, in the order that // they should map to TF GPU ids "/device:GPU:0", "/device:GPU:1", etc, // based upon 'visible_gpu_order' which was generated by parsing // GPUOptions::visible_device_list which is a comma-separated list of CUDA GPU // ids. - Status GetValidDeviceIds(const std::vector<CudaGpuId>& visible_gpu_order, - std::vector<CudaGpuId>* ids); + Status GetValidDeviceIds(const std::vector<PlatformGpuId>& visible_gpu_order, + std::vector<PlatformGpuId>* ids); - // visible_gpu_initialized_[cuda_gpu_id] is true if visible GPU cuda_gpu_id - // has been initialized by the process. + // visible_gpu_initialized_[platform_gpu_id] is true if visible GPU + // platform_gpu_id has been initialized by the process. std::unordered_map<int, bool> visible_gpu_initialized_; }; |