aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/platform/tracing.cc
blob: a4cb92dee459085d1067e0fe9b451feb8ec5ae17 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#include "tensorflow/core/platform/tracing.h"

#include <atomic>
#include <map>
#include <string>
#include "tensorflow/core/framework/step_stats.pb.h"
#include "tensorflow/core/lib/strings/str_util.h"
#include "tensorflow/core/platform/logging.h"

namespace tensorflow {

StepStatsCollector::StepStatsCollector(StepStats* ss) : step_stats_(ss) {}

void StepStatsCollector::Save(const string& device, NodeExecStats* nt) {
  VLOG(1) << "Save dev " << device << " nt " << nt;
  {
    mutex_lock l(mu_);
    DeviceStepStats* dss = nullptr;
    // Slow linear scan, but it should only be called
    // by a Worker in a context with < ~10 devices.
    // TODO(tucker): consider adding a std::unordered_map.
    for (auto& ds : *step_stats_->mutable_dev_stats()) {
      if (ds.device() == device) {
        dss = &ds;
        break;
      }
    }
    if (dss == nullptr) {
      dss = step_stats_->add_dev_stats();
      dss->set_device(device);
    }
    nt->Swap(dss->add_node_stats());
  }
  delete nt;
}

void StepStatsCollector::Swap(StepStats* ss) {
  mutex_lock l(mu_);
  CHECK(step_stats_);
  ss->Swap(step_stats_);
}

namespace port {

int32 Tracing::category_id_[kEventCategoryMax];
uint64 Tracing::event_mask_ = 0;
std::map<string, int32>* Tracing::name_map_ = new std::map<string, int32>;

// This needs to be kept in sync with the EventCategory enumeration.
const char* Tracing::EventCategoryString(EventCategory category) {
  switch (category) {
    case EventCategory::kScheduleClosure:
      return "ScheduleClosure";
    case EventCategory::kRunClosure:
      return "RunClosure";
    case EventCategory::kCompute:
      return "Compute";
    case EventCategory::kEventCategoryMax:
      return "EventCategoryMax";
  }
  return "Unknown";
}

// This function allows the user to specify arbitrary subsets of the
// supported Threadscape events and activities.
bool Tracing::ParseEventMask(const char* flagname, const string& value) {
  VLOG(1) << flagname << " set to " << value;
  int64 new_mask = 0;
  std::vector<string> events =
      str_util::Split(value, ',', str_util::SkipEmpty());
  for (string name : events) {
    bool clear = false;
    int64 mask = 0;
    if (name[0] == '!') {
      // invert the sense of the flag
      clear = true;
      name = name.substr(1);
    }
    if (name == "ALL") {
      mask = ~0;
    } else {
      auto it = name_map_->find(name);
      int32 id;
      if (it == name_map_->end()) {
        id = -1;
      } else {
        id = it->second;
      }
      if (id < 0) {
        LOG(ERROR) << "Can't parse event mask name " << name;
        return false;
      }
      mask = 1 << id;
    }
    if (clear) {
      new_mask &= ~mask;
    } else {
      new_mask |= mask;
    }
  }
  // parsing was successful; set the permanent event mask
  event_mask_ = new_mask;
  return true;
}

static std::atomic<Tracing::Engine*> tracing_engine;

void Tracing::RegisterEngine(Engine* e) {
  tracing_engine.store(e, std::memory_order_release);
}

static Tracing::Engine* engine() {
  return tracing_engine.load(std::memory_order_acquire);
}

Tracing::Engine::~Engine() {}
Tracing::Engine::Annotation::~Annotation() {}
Tracing::Engine::Tracer::~Tracer() {}

Tracing::ScopedAnnotation::ScopedAnnotation(StringPiece name) {
  auto e = engine();
  if (e) {
    annotation_.reset(e->PushAnnotation(name));
  }
}

Tracing::TraceMe::TraceMe(StringPiece name) {
  auto e = engine();
  if (e) {
    tracer_.reset(e->StartTracing(name));
  }
}

}  // namespace port
}  // namespace tensorflow