diff options
author | Eric Liu <ioeric@google.com> | 2017-08-24 02:42:39 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-08-24 02:46:21 -0700 |
commit | b23b244cceb8019e025c8f3e35d2393cca3c430b (patch) | |
tree | 688ec296f5490ef2e5d025fcb187f301d8e83b73 | |
parent | 410fe2c31009a5ae341241afc76257cc19615f10 (diff) |
[tpu:profiler] Support the Op Profile tool in TPU profiler.
o Add an op_profile proto that defines a Profile class which assembles a
hierarchical performance profile based on HLOs in trace_events.
o Dump JSON-formatted op profile proto to the log directory.
PiperOrigin-RevId: 166318667
-rw-r--r-- | tensorflow/contrib/tpu/profiler/BUILD | 10 | ||||
-rw-r--r-- | tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc | 42 | ||||
-rw-r--r-- | tensorflow/contrib/tpu/profiler/op_profile.proto | 53 | ||||
-rw-r--r-- | tensorflow/contrib/tpu/profiler/tpu_profiler.proto | 6 |
4 files changed, 104 insertions, 7 deletions
diff --git a/tensorflow/contrib/tpu/profiler/BUILD b/tensorflow/contrib/tpu/profiler/BUILD index e976e1dd05..0c860ad4d7 100644 --- a/tensorflow/contrib/tpu/profiler/BUILD +++ b/tensorflow/contrib/tpu/profiler/BUILD @@ -10,7 +10,7 @@ tf_proto_library_cc( has_services = 1, cc_api_version = 2, cc_grpc_version = 1, - protodeps = tf_additional_all_protos(), + protodeps = [":op_profile_proto"] + tf_additional_all_protos(), visibility = ["//visibility:public"], ) @@ -19,6 +19,7 @@ cc_binary( srcs = ["capture_tpu_profile.cc"], visibility = ["//tensorflow/contrib/tpu/profiler:__subpackages__"], deps = [ + ":op_profile_proto_cc", ":tpu_profiler_proto_cc", ":trace_events_proto_cc", ":trace_events_to_json", @@ -60,3 +61,10 @@ cc_test( "@jsoncpp_git//:jsoncpp", ], ) + +tf_proto_library_cc( + name = "op_profile_proto", + srcs = ["op_profile.proto"], + cc_api_version = 2, + visibility = ["//visibility:public"], +) diff --git a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc index 4412db80e7..a0dc15249f 100644 --- a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc +++ b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc @@ -24,6 +24,7 @@ limitations under the License. #include <ctime> #include <vector> +#include "tensorflow/contrib/tpu/profiler/op_profile.pb.h" #include "tensorflow/contrib/tpu/profiler/tpu_profiler.grpc.pb.h" #include "tensorflow/contrib/tpu/profiler/trace_events.pb.h" #include "tensorflow/contrib/tpu/profiler/trace_events_to_json.h" @@ -48,8 +49,11 @@ using ::tensorflow::TPUProfiler; using ::grpc::ClientContext; using ::tensorflow::io::JoinPath; +using ::tensorflow::protobuf::util::JsonOptions; +using ::tensorflow::protobuf::util::MessageToJsonString; constexpr char kProfilePluginDirectory[] = "plugins/profile/"; +constexpr char kJsonOpProfileFileName[] = "op_profile.json"; constexpr char kProtoTraceFileName[] = "trace"; constexpr char kJsonTraceFileName[] = "trace.json.gz"; constexpr char kGraphRunPrefix[] = "tpu_profiler.hlo_graph."; @@ -75,12 +79,15 @@ Status WriteGzippedDataToFile(const string& filename, const string& data) { return Status::OK(); } -// This dumps a rawproto trace and a JSON trace to -// <logdir>/plugins/profile/<run>/. -void DumpTraceToLogDirectory(StringPiece logdir, StringPiece run, - const string& encoded_trace) { +// Dumps profile data to <logdir>/plugins/profile/<run>/. +inline string CreateProfileRunDirectory(const string& logdir, + const string& run) { string run_dir = JoinPath(logdir, kProfilePluginDirectory, run); TF_CHECK_OK(Env::Default()->RecursivelyCreateDir(run_dir)); + return run_dir; +} + +void DumpTraceToLogDirectory(StringPiece run_dir, const string& encoded_trace) { string proto_path = JoinPath(run_dir, kProtoTraceFileName); TF_CHECK_OK(WriteStringToFile(Env::Default(), proto_path, encoded_trace)); LOG(INFO) << "Dumped raw-proto trace data to " << proto_path; @@ -94,6 +101,22 @@ void DumpTraceToLogDirectory(StringPiece logdir, StringPiece run, std::cout << "Dumped JSON trace data to " << json_path << std::endl; } +void DumpOpProfileToLogDirectory(StringPiece run_dir, + const tpu::op_profile::Profile& profile) { + string path = JoinPath(run_dir, kJsonOpProfileFileName); + string json; + JsonOptions options; + options.always_print_primitive_fields = true; + auto status = MessageToJsonString(profile, &json, options); + if (!status.ok()) { + std::cerr << "Failed to convert op profile to json. Skipping... " + << status.error_message() << std::endl; + return; + } + TF_CHECK_OK(WriteStringToFile(Env::Default(), path, json)); + std::cout << "Dumped json op profile data to " << path << std::endl; +} + ProfileResponse Profile(const string& service_addr, int duration_ms) { ProfileRequest request; request.set_duration_ms(duration_ms); @@ -153,14 +176,15 @@ int main(int argc, char** argv) { tensorflow::tpu::Profile(FLAGS_service_addr, duration_ms); // Use the current timestamp as the run name. tensorflow::string run = tensorflow::tpu::GetCurrentTimeStampAsString(); + tensorflow::string run_dir = + tensorflow::tpu::CreateProfileRunDirectory(FLAGS_logdir, run); // Ignore computation_graph for now. if (response.encoded_trace().empty()) { std::cout << "No trace event is collected during the " << duration_ms << "ms interval." << std::endl; } else { LOG(INFO) << "Converting trace events to TraceViewer JSON."; - tensorflow::tpu::DumpTraceToLogDirectory(FLAGS_logdir, run, - response.encoded_trace()); + tensorflow::tpu::DumpTraceToLogDirectory(run_dir, response.encoded_trace()); } int num_graphs = response.computation_graph_size(); if (num_graphs > 0) { @@ -175,6 +199,12 @@ int main(int argc, char** argv) { tensorflow::tpu::DumpGraph( FLAGS_logdir, run, response.computation_graph(0).SerializeAsString()); } + if (response.has_op_profile() && + (response.op_profile().has_by_program_structure() || + response.op_profile().has_by_category())) { + tensorflow::tpu::DumpOpProfileToLogDirectory(run_dir, + response.op_profile()); + } // Print this at the end so that it's not buried in irrelevant LOG messages. std::cout << "NOTE: using the trace duration " << duration_ms << "ms." << std::endl diff --git a/tensorflow/contrib/tpu/profiler/op_profile.proto b/tensorflow/contrib/tpu/profiler/op_profile.proto new file mode 100644 index 0000000000..6911b649a0 --- /dev/null +++ b/tensorflow/contrib/tpu/profiler/op_profile.proto @@ -0,0 +1,53 @@ +syntax = "proto3"; + +package tensorflow.tpu.op_profile; + +// Profile is the top-level data that summarizes a program. +message Profile { + // Root of a profile broken down by instruction category. + Node by_category = 1; + // Root of a profile broken down by program structure. + Node by_program_structure = 2; +} + +// An entry in the profile tree. (An instruction, or set of instructions). +message Node { + string name = 1; // Semantics depend on contents. + Metrics metrics = 2; // May be omitted e.g. for fused instructions. + repeated Node children = 3; + + // Details about what this node represents. + oneof contents { + InstructionCategory category = 4; + XLAInstruction xla = 5; + } + // A category of XLA instructions. + // name is a descriptive string, like "data formatting". + message InstructionCategory { + } + // A single XLA instruction. + // name is the unique instruction id, like "%multiply.5". + message XLAInstruction { + string op = 1; // Opcode like %multiply + string expression = 2; // %multiply = [shape]multiply(operand1, operand2) + string provenance = 3; // Typically the TensorFlow operation name. + string category = 4; + } +} + +// Measurements of an operation (or aggregated set of operations). +// Metrics are always "total" rather than "self". +message Metrics { + // Core-time taken by this operation, as a fraction of all operations. + double time = 1; + // Floating point computations performed by this operation, as a fraction of + // peak core FLOPS * program time. This representation has useful properties: + // - it is proportional to the number of floating point operations performed + // - utilization is flops/time + // - wasted potential flops is proportional to time - flops + // - it does not reveal the peak core FLOPS of the hardware + double flops = 2; + + double raw_time = 11; // Elapsed core-time in picoseconds. + double raw_flops = 12; // Total floating-point operations performed. +} diff --git a/tensorflow/contrib/tpu/profiler/tpu_profiler.proto b/tensorflow/contrib/tpu/profiler/tpu_profiler.proto index 21e355a519..d0a27f1a3d 100644 --- a/tensorflow/contrib/tpu/profiler/tpu_profiler.proto +++ b/tensorflow/contrib/tpu/profiler/tpu_profiler.proto @@ -2,6 +2,7 @@ syntax = "proto3"; package tensorflow; import "tensorflow/core/framework/graph.proto"; +import "tensorflow/contrib/tpu/profiler/op_profile.proto"; // The TPUProfiler service retrieves performance information about // the programs running on connected TPUs over a period of time. @@ -34,4 +35,9 @@ message ProfileResponse { // during the profiling period. Describes the devices and resources that // 'trace_events' refers to. bytes encoded_trace = 3; + + // Assembles a hierarchical performance profile based on HLOs in trace events. + // If the trace covers multiple programs, the longest-running one is analyzed. + // See op_profile.proto for the detailed semantics of the returned profile. + tpu.op_profile.Profile op_profile = 4; } |