diff options
author | 2016-05-03 19:17:20 -0800 | |
---|---|---|
committer | 2016-05-03 20:21:43 -0700 | |
commit | 47916d41d12dff93b57544c1a5d09b7dcae93d84 (patch) | |
tree | f317475a5a8a481c17a45c38b02d2651ced9327f /tensorflow/stream_executor/cuda/cuda_stream.h | |
parent | 06cab2534f04965e9887ce3ea705154e5b1fc47d (diff) |
Change CUDAStream::GetOrCreateCompletedEvent from lazy initialization the
event to constructing the event in Init. This reduces CPU cost at bit by
avoiding a function call and mutex lock, at the expense of an allocated event
for streams that don't need them.
Change: 121444312
Diffstat (limited to 'tensorflow/stream_executor/cuda/cuda_stream.h')
-rw-r--r-- | tensorflow/stream_executor/cuda/cuda_stream.h | 5 |
1 files changed, 2 insertions, 3 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_stream.h b/tensorflow/stream_executor/cuda/cuda_stream.h index cb12a094e6..0af2540cfc 100644 --- a/tensorflow/stream_executor/cuda/cuda_stream.h +++ b/tensorflow/stream_executor/cuda/cuda_stream.h @@ -60,7 +60,7 @@ class CUDAStream : public internal::StreamInterface { // Retrieves an event which indicates that all work enqueued into the stream // has completed. Ownership of the event is not transferred to the caller, the // event is owned by this stream. - bool GetOrCreateCompletedEvent(CUevent *completed_event); + CUevent* completed_event() { return &completed_event_; } // Returns the CUstream value for passing to the CUDA API. // @@ -74,12 +74,11 @@ class CUDAStream : public internal::StreamInterface { CUDAExecutor *parent() const { return parent_; } private: - mutex mu_; // mutex that guards the completion event. CUDAExecutor *parent_; // Executor that spawned this stream. CUstream cuda_stream_; // Wrapped CUDA stream handle. // Event that indicates this stream has completed. - CUevent completed_event_ GUARDED_BY(mu_); + CUevent completed_event_ = nullptr; }; // Helper functions to simplify extremely common flows. |