aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/cuda/cuda_timer.h
blob: e49e21240389aaf55e38ffff6e92b14012587227 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
// Defines the CUDATimer type - the CUDA-specific implementation of the generic
// StreamExecutor Timer interface.

#ifndef TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_TIMER_H_
#define TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_TIMER_H_

#include "tensorflow/stream_executor/stream_executor_internal.h"
#include "tensorflow/stream_executor/cuda/cuda_driver.h"
#include "tensorflow/stream_executor/cuda/cuda_gpu_executor.h"

namespace perftools {
namespace gputools {
namespace cuda {

class CUDAExecutor;
class CUDAStream;

// Wraps a pair of CUevents in order to satisfy the platform-independent
// TimerInferface -- both a start and a stop event are present which may be
// recorded in a stream.
class CUDATimer : public internal::TimerInterface {
 public:
  explicit CUDATimer(CUDAExecutor *parent)
      : parent_(parent), start_event_(nullptr), stop_event_(nullptr) {}

  // Note: teardown is explicitly handled in this API by a call to
  // StreamExecutor::DeallocateTimer(), which invokes Destroy().
  ~CUDATimer() override {}

  // Allocates the platform-specific pieces of the timer, called as part of
  // StreamExecutor::AllocateTimer().
  bool Init();

  // Deallocates the platform-specific pieces of the timer, called as part of
  // StreamExecutor::DeallocateTimer().
  void Destroy();

  // Records the "timer start" event at the current point in the stream.
  bool Start(CUDAStream *stream);

  // Records the "timer stop" event at the current point in the stream.
  bool Stop(CUDAStream *stream);

  // Returns the elapsed time, in milliseconds, between the start and stop
  // events.
  float GetElapsedMilliseconds() const;

  // See perftools::gputools::Timer::Microseconds().
  // TODO(leary) make this into an error code interface...
  uint64 Microseconds() const override {
    return GetElapsedMilliseconds() * 1e3;
  }

  // See perftools::GPUTools::Timer::Nanoseconds().
  uint64 Nanoseconds() const override { return GetElapsedMilliseconds() * 1e6; }

 private:
  CUDAExecutor *parent_;
  CUevent start_event_;  // Event recorded to indicate the "start" timestamp
                         // executing in a stream.
  CUevent stop_event_;   // Event recorded to indicate the "stop" timestamp
                         // executing in a stream.
};

}  // namespace cuda
}  // namespace gputools
}  // namespace perftools

#endif  // TENSORFLOW_STREAM_EXECUTOR_CUDA_CUDA_TIMER_H_