diff options
-rw-r--r-- | tensorflow/core/profiler/internal/tfprof_stats.cc | 31 | ||||
-rw-r--r-- | tensorflow/core/profiler/internal/tfprof_stats.h | 3 |
2 files changed, 33 insertions, 1 deletions
diff --git a/tensorflow/core/profiler/internal/tfprof_stats.cc b/tensorflow/core/profiler/internal/tfprof_stats.cc index b84272ae72..6e11c3f121 100644 --- a/tensorflow/core/profiler/internal/tfprof_stats.cc +++ b/tensorflow/core/profiler/internal/tfprof_stats.cc @@ -48,6 +48,7 @@ TFStats::TFStats(std::unique_ptr<GraphDef> graph, std::unique_ptr<OpLogProto> op_log, std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader) : has_code_traces_(false), + miss_gpu_stream_(false), ckpt_reader_(std::move(ckpt_reader)) { CHECK(graph) << "Must at least have GraphDef"; @@ -70,7 +71,9 @@ TFStats::TFStats(std::unique_ptr<GraphDef> graph, TFStats::TFStats(const string& filename, std::unique_ptr<checkpoint::CheckpointReader> ckpt_reader) - : has_code_traces_(false), ckpt_reader_(std::move(ckpt_reader)) { + : has_code_traces_(false), + miss_gpu_stream_(false), + ckpt_reader_(std::move(ckpt_reader)) { string str; Status s = ReadFileToString(Env::Default(), filename, &str); if (!s.ok()) { @@ -258,7 +261,17 @@ void TFStats::AddRunMeta(int64 step, std::unique_ptr<RunMetadata> run_meta) { } steps_.insert(step); + bool has_gpu_scheduling = false; + bool has_gpu_stream = false; + for (const auto& dev_stat : run_meta->step_stats().dev_stats()) { + string dev = str_util::Lowercase(dev_stat.device()); + if (IsPlacedOnAccelerator(dev)) { + has_gpu_scheduling = true; + if (CountAsAcceleratorTime(dev)) { + has_gpu_stream = true; + } + } for (const NodeExecStats& node_stat : dev_stat.node_stats()) { string name = node_stat.node_name(); // Sometimes the node_name is suffixed with unnecessary information. @@ -280,6 +293,21 @@ void TFStats::AddRunMeta(int64 step, std::unique_ptr<RunMetadata> run_meta) { } } } + + if (has_gpu_scheduling && !has_gpu_stream) { + miss_gpu_stream_ = true; + } +} + +void TFStats::MaybeReportMissingTrace() const { + if (miss_gpu_stream_) { + fprintf(stderr, + "\n\nFound accelerator operation but misses accelerator " + "stream stats!\n\n" + "It's likely a gpu tracing issue rather than tf-profiler issue.\n" + "If you found your operation missing accelerator time, " + "consider filing a bug to xprof-dev@!\n\n"); + } } void TFStats::SerializeToString(string* content) { @@ -312,6 +340,7 @@ void TFStats::WriteProfile(const string& filename) { } bool TFStats::Validate(const Options& opts) const { + MaybeReportMissingTrace(); if (opts.step >= 0 && steps_.find(opts.step) == steps_.end()) { fprintf(stderr, "Options -step=%lld not found.\nAvailable steps: ", opts.step); diff --git a/tensorflow/core/profiler/internal/tfprof_stats.h b/tensorflow/core/profiler/internal/tfprof_stats.h index 46f9326c55..621285a7e9 100644 --- a/tensorflow/core/profiler/internal/tfprof_stats.h +++ b/tensorflow/core/profiler/internal/tfprof_stats.h @@ -98,11 +98,14 @@ class TFStats { // For test purpose only. void AddNodeForTest(int64 step, std::unique_ptr<TFGraphNode> node); + void MaybeReportMissingTrace() const; + private: bool Validate(const Options& opts) const; std::set<int64> steps_; bool has_code_traces_; + bool miss_gpu_stream_; std::unique_ptr<TFScope> scope_view_; std::unique_ptr<TFGraph> graph_view_; std::unique_ptr<TFCode> code_view_; |