aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/cuda/cuda_timer.h
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/stream_executor/cuda/cuda_timer.h')
-rw-r--r--tensorflow/stream_executor/cuda/cuda_timer.h69
1 files changed, 69 insertions, 0 deletions
diff --git a/tensorflow/stream_executor/cuda/cuda_timer.h b/tensorflow/stream_executor/cuda/cuda_timer.h
new file mode 100644
index 0000000000..e49e212403
--- /dev/null
+++ b/tensorflow/stream_executor/cuda/cuda_timer.h
@@ -0,0 +1,69 @@
+// Defines the CUDATimer type - the CUDA-specific implementation of the generic
+// StreamExecutor Timer interface.
+
+#ifndef TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_TIMER_H_
+#define TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_TIMER_H_
+
+#include "tensorflow/stream_executor/stream_executor_internal.h"
+#include "tensorflow/stream_executor/cuda/cuda_driver.h"
+#include "tensorflow/stream_executor/cuda/cuda_gpu_executor.h"
+
+namespace perftools {
+namespace gputools {
+namespace cuda {
+
+class CUDAExecutor;
+class CUDAStream;
+
+// Wraps a pair of CUevents in order to satisfy the platform-independent
+// TimerInferface -- both a start and a stop event are present which may be
+// recorded in a stream.
+class CUDATimer : public internal::TimerInterface {
+ public:
+ explicit CUDATimer(CUDAExecutor *parent)
+ : parent_(parent), start_event_(nullptr), stop_event_(nullptr) {}
+
+ // Note: teardown is explicitly handled in this API by a call to
+ // StreamExecutor::DeallocateTimer(), which invokes Destroy().
+ ~CUDATimer() override {}
+
+ // Allocates the platform-specific pieces of the timer, called as part of
+ // StreamExecutor::AllocateTimer().
+ bool Init();
+
+ // Deallocates the platform-specific pieces of the timer, called as part of
+ // StreamExecutor::DeallocateTimer().
+ void Destroy();
+
+ // Records the "timer start" event at the current point in the stream.
+ bool Start(CUDAStream *stream);
+
+ // Records the "timer stop" event at the current point in the stream.
+ bool Stop(CUDAStream *stream);
+
+ // Returns the elapsed time, in milliseconds, between the start and stop
+ // events.
+ float GetElapsedMilliseconds() const;
+
+ // See perftools::gputools::Timer::Microseconds().
+ // TODO(leary) make this into an error code interface...
+ uint64 Microseconds() const override {
+ return GetElapsedMilliseconds() * 1e3;
+ }
+
+ // See perftools::GPUTools::Timer::Nanoseconds().
+ uint64 Nanoseconds() const override { return GetElapsedMilliseconds() * 1e6; }
+
+ private:
+ CUDAExecutor *parent_;
+ CUevent start_event_; // Event recorded to indicate the "start" timestamp
+ // executing in a stream.
+ CUevent stop_event_; // Event recorded to indicate the "stop" timestamp
+ // executing in a stream.
+};
+
+} // namespace cuda
+} // namespace gputools
+} // namespace perftools
+
+#endif // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_TIMER_H_