aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/cuda/cuda_timer.cc
blob: ad5e13ab6b7d07a9dc791741ba3daa23b24585d9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#include "tensorflow/stream_executor/cuda/cuda_timer.h"

#include "tensorflow/stream_executor/cuda/cuda_driver.h"
#include "tensorflow/stream_executor/cuda/cuda_gpu_executor.h"
#include "tensorflow/stream_executor/cuda/cuda_stream.h"
#include "tensorflow/stream_executor/lib/status.h"

namespace perftools {
namespace gputools {
namespace cuda {

bool CUDATimer::Init() {
  CHECK(start_event_ == nullptr && stop_event_ == nullptr);
  CUcontext context = parent_->cuda_context();
  if (!CUDADriver::CreateEvent(context, &start_event_,
                               CUDADriver::EventFlags::kDefault)
           .ok()) {
    return false;
  }

  if (!CUDADriver::CreateEvent(context, &stop_event_,
                               CUDADriver::EventFlags::kDefault)
           .ok()) {
    port::Status status = CUDADriver::DestroyEvent(context, &start_event_);
    if (!status.ok()) {
      LOG(ERROR) << status;
    }
    return false;
  }

  CHECK(start_event_ != nullptr && stop_event_ != nullptr);
  return true;
}

void CUDATimer::Destroy() {
  CUcontext context = parent_->cuda_context();
  port::Status status = CUDADriver::DestroyEvent(context, &start_event_);
  if (!status.ok()) {
    LOG(ERROR) << status;
  }

  status = CUDADriver::DestroyEvent(context, &stop_event_);
  if (!status.ok()) {
    LOG(ERROR) << status;
  }
}

float CUDATimer::GetElapsedMilliseconds() const {
  CHECK(start_event_ != nullptr && stop_event_ != nullptr);
  // TODO(leary) provide a way to query timer resolution?
  // CUDA docs say a resolution of about 0.5us
  float elapsed_milliseconds = NAN;
  (void)CUDADriver::GetEventElapsedTime(parent_->cuda_context(),
                                        &elapsed_milliseconds, start_event_,
                                        stop_event_);
  return elapsed_milliseconds;
}

bool CUDATimer::Start(CUDAStream *stream) {
  return CUDADriver::RecordEvent(parent_->cuda_context(), start_event_,
                                 stream->cuda_stream())
      .ok();
}

bool CUDATimer::Stop(CUDAStream *stream) {
  return CUDADriver::RecordEvent(parent_->cuda_context(), stop_event_,
                                 stream->cuda_stream())
      .ok();
}

}  // namespace cuda
}  // namespace gputools
}  // namespace perftools