aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/cuda/cuda_stream.h
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/stream_executor/cuda/cuda_stream.h')
-rw-r--r--tensorflow/stream_executor/cuda/cuda_stream.h74
1 files changed, 74 insertions, 0 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_stream.h b/tensorflow/stream_executor/cuda/cuda_stream.h
new file mode 100644
index 0000000000..f6db64a1bf
--- /dev/null
+++ b/tensorflow/stream_executor/cuda/cuda_stream.h
@@ -0,0 +1,74 @@
+// Defines the CUDAStream type - the CUDA-specific implementation of the generic
+// StreamExecutor Stream interface.
+
+#ifndef TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_STREAM_H_
+#define TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_STREAM_H_
+
+#include "tensorflow/stream_executor/cuda/cuda_driver.h"
+#include "tensorflow/stream_executor/cuda/cuda_gpu_executor.h"
+#include "tensorflow/stream_executor/stream_executor_internal.h"
+
+namespace perftools {
+namespace gputools {
+namespace cuda {
+
+class CUDAExecutor;
+
+// Wraps a CUstream in order to satisfy the platform-independent
+// StreamInterface.
+//
+// Thread-safe post-initialization.
+class CUDAStream : public internal::StreamInterface {
+ public:
+ explicit CUDAStream(CUDAExecutor *parent)
+ : parent_(parent), cuda_stream_(nullptr), completed_event_(nullptr) {}
+
+ // Note: teardown is handled by a parent's call to DeallocateStream.
+ ~CUDAStream() override {}
+
+ void *CudaStreamHack() override { return cuda_stream_; }
+ void **CudaStreamMemberHack() override {
+ return reinterpret_cast<void **>(&cuda_stream_);
+ }
+
+ // Explicitly initialize the CUDA resources associated with this stream, used
+ // by StreamExecutor::AllocateStream().
+ bool Init();
+
+ // Explicitly destroy the CUDA resources associated with this stream, used by
+ // StreamExecutor::DeallocateStream().
+ void Destroy();
+
+ // Returns true if no work is pending or executing on the stream.
+ bool IsIdle() const;
+
+ // Retrieves an event which indicates that all work enqueued into the stream
+ // has completed. Ownership of the event is not transferred to the caller, the
+ // event is owned by this stream.
+ bool GetOrCreateCompletedEvent(CUevent *completed_event);
+
+ // Returns the CUstream value for passing to the CUDA API.
+ //
+ // Precond: this CUDAStream has been allocated (otherwise passing a nullptr
+ // into the NVIDIA library causes difficult-to-understand faults).
+ CUstream cuda_stream() const {
+ DCHECK(cuda_stream_ != nullptr);
+ return const_cast<CUstream>(cuda_stream_);
+ }
+
+ CUDAExecutor *parent() const { return parent_; }
+
+ private:
+ mutex mu_; // mutex that guards the completion event.
+ CUDAExecutor *parent_; // Executor that spawned this stream.
+ CUstream cuda_stream_; // Wrapped CUDA stream handle.
+
+ // Event that indicates this stream has completed.
+ CUevent completed_event_ GUARDED_BY(mu_);
+};
+
+} // namespace cuda
+} // namespace gputools
+} // namespace perftools
+
+#endif // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_STREAM_H_