aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Eric Liu <ioeric@google.com>2017-08-24 02:42:39 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-08-24 02:46:21 -0700
commitb23b244cceb8019e025c8f3e35d2393cca3c430b (patch)
tree688ec296f5490ef2e5d025fcb187f301d8e83b73
parent410fe2c31009a5ae341241afc76257cc19615f10 (diff)
[tpu:profiler] Support the Op Profile tool in TPU profiler.
o Add an op_profile proto that defines a Profile class which assembles a hierarchical performance profile based on HLOs in trace_events. o Dump JSON-formatted op profile proto to the log directory. PiperOrigin-RevId: 166318667
-rw-r--r--tensorflow/contrib/tpu/profiler/BUILD10
-rw-r--r--tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc42
-rw-r--r--tensorflow/contrib/tpu/profiler/op_profile.proto53
-rw-r--r--tensorflow/contrib/tpu/profiler/tpu_profiler.proto6
4 files changed, 104 insertions, 7 deletions
diff --git a/tensorflow/contrib/tpu/profiler/BUILD b/tensorflow/contrib/tpu/profiler/BUILD
index e976e1dd05..0c860ad4d7 100644
--- a/tensorflow/contrib/tpu/profiler/BUILD
+++ b/tensorflow/contrib/tpu/profiler/BUILD
@@ -10,7 +10,7 @@ tf_proto_library_cc(
has_services = 1,
cc_api_version = 2,
cc_grpc_version = 1,
- protodeps = tf_additional_all_protos(),
+ protodeps = [":op_profile_proto"] + tf_additional_all_protos(),
visibility = ["//visibility:public"],
)
@@ -19,6 +19,7 @@ cc_binary(
srcs = ["capture_tpu_profile.cc"],
visibility = ["//tensorflow/contrib/tpu/profiler:__subpackages__"],
deps = [
+ ":op_profile_proto_cc",
":tpu_profiler_proto_cc",
":trace_events_proto_cc",
":trace_events_to_json",
@@ -60,3 +61,10 @@ cc_test(
"@jsoncpp_git//:jsoncpp",
],
)
+
+tf_proto_library_cc(
+ name = "op_profile_proto",
+ srcs = ["op_profile.proto"],
+ cc_api_version = 2,
+ visibility = ["//visibility:public"],
+)
diff --git a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc
index 4412db80e7..a0dc15249f 100644
--- a/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc
+++ b/tensorflow/contrib/tpu/profiler/capture_tpu_profile.cc
@@ -24,6 +24,7 @@ limitations under the License.
#include <ctime>
#include <vector>
+#include "tensorflow/contrib/tpu/profiler/op_profile.pb.h"
#include "tensorflow/contrib/tpu/profiler/tpu_profiler.grpc.pb.h"
#include "tensorflow/contrib/tpu/profiler/trace_events.pb.h"
#include "tensorflow/contrib/tpu/profiler/trace_events_to_json.h"
@@ -48,8 +49,11 @@ using ::tensorflow::TPUProfiler;
using ::grpc::ClientContext;
using ::tensorflow::io::JoinPath;
+using ::tensorflow::protobuf::util::JsonOptions;
+using ::tensorflow::protobuf::util::MessageToJsonString;
constexpr char kProfilePluginDirectory[] = "plugins/profile/";
+constexpr char kJsonOpProfileFileName[] = "op_profile.json";
constexpr char kProtoTraceFileName[] = "trace";
constexpr char kJsonTraceFileName[] = "trace.json.gz";
constexpr char kGraphRunPrefix[] = "tpu_profiler.hlo_graph.";
@@ -75,12 +79,15 @@ Status WriteGzippedDataToFile(const string& filename, const string& data) {
return Status::OK();
}
-// This dumps a rawproto trace and a JSON trace to
-// <logdir>/plugins/profile/<run>/.
-void DumpTraceToLogDirectory(StringPiece logdir, StringPiece run,
- const string& encoded_trace) {
+// Dumps profile data to <logdir>/plugins/profile/<run>/.
+inline string CreateProfileRunDirectory(const string& logdir,
+ const string& run) {
string run_dir = JoinPath(logdir, kProfilePluginDirectory, run);
TF_CHECK_OK(Env::Default()->RecursivelyCreateDir(run_dir));
+ return run_dir;
+}
+
+void DumpTraceToLogDirectory(StringPiece run_dir, const string& encoded_trace) {
string proto_path = JoinPath(run_dir, kProtoTraceFileName);
TF_CHECK_OK(WriteStringToFile(Env::Default(), proto_path, encoded_trace));
LOG(INFO) << "Dumped raw-proto trace data to " << proto_path;
@@ -94,6 +101,22 @@ void DumpTraceToLogDirectory(StringPiece logdir, StringPiece run,
std::cout << "Dumped JSON trace data to " << json_path << std::endl;
}
+void DumpOpProfileToLogDirectory(StringPiece run_dir,
+ const tpu::op_profile::Profile& profile) {
+ string path = JoinPath(run_dir, kJsonOpProfileFileName);
+ string json;
+ JsonOptions options;
+ options.always_print_primitive_fields = true;
+ auto status = MessageToJsonString(profile, &json, options);
+ if (!status.ok()) {
+ std::cerr << "Failed to convert op profile to json. Skipping... "
+ << status.error_message() << std::endl;
+ return;
+ }
+ TF_CHECK_OK(WriteStringToFile(Env::Default(), path, json));
+ std::cout << "Dumped json op profile data to " << path << std::endl;
+}
+
ProfileResponse Profile(const string& service_addr, int duration_ms) {
ProfileRequest request;
request.set_duration_ms(duration_ms);
@@ -153,14 +176,15 @@ int main(int argc, char** argv) {
tensorflow::tpu::Profile(FLAGS_service_addr, duration_ms);
// Use the current timestamp as the run name.
tensorflow::string run = tensorflow::tpu::GetCurrentTimeStampAsString();
+ tensorflow::string run_dir =
+ tensorflow::tpu::CreateProfileRunDirectory(FLAGS_logdir, run);
// Ignore computation_graph for now.
if (response.encoded_trace().empty()) {
std::cout << "No trace event is collected during the " << duration_ms
<< "ms interval." << std::endl;
} else {
LOG(INFO) << "Converting trace events to TraceViewer JSON.";
- tensorflow::tpu::DumpTraceToLogDirectory(FLAGS_logdir, run,
- response.encoded_trace());
+ tensorflow::tpu::DumpTraceToLogDirectory(run_dir, response.encoded_trace());
}
int num_graphs = response.computation_graph_size();
if (num_graphs > 0) {
@@ -175,6 +199,12 @@ int main(int argc, char** argv) {
tensorflow::tpu::DumpGraph(
FLAGS_logdir, run, response.computation_graph(0).SerializeAsString());
}
+ if (response.has_op_profile() &&
+ (response.op_profile().has_by_program_structure() ||
+ response.op_profile().has_by_category())) {
+ tensorflow::tpu::DumpOpProfileToLogDirectory(run_dir,
+ response.op_profile());
+ }
// Print this at the end so that it's not buried in irrelevant LOG messages.
std::cout
<< "NOTE: using the trace duration " << duration_ms << "ms." << std::endl
diff --git a/tensorflow/contrib/tpu/profiler/op_profile.proto b/tensorflow/contrib/tpu/profiler/op_profile.proto
new file mode 100644
index 0000000000..6911b649a0
--- /dev/null
+++ b/tensorflow/contrib/tpu/profiler/op_profile.proto
@@ -0,0 +1,53 @@
+syntax = "proto3";
+
+package tensorflow.tpu.op_profile;
+
+// Profile is the top-level data that summarizes a program.
+message Profile {
+ // Root of a profile broken down by instruction category.
+ Node by_category = 1;
+ // Root of a profile broken down by program structure.
+ Node by_program_structure = 2;
+}
+
+// An entry in the profile tree. (An instruction, or set of instructions).
+message Node {
+ string name = 1; // Semantics depend on contents.
+ Metrics metrics = 2; // May be omitted e.g. for fused instructions.
+ repeated Node children = 3;
+
+ // Details about what this node represents.
+ oneof contents {
+ InstructionCategory category = 4;
+ XLAInstruction xla = 5;
+ }
+ // A category of XLA instructions.
+ // name is a descriptive string, like "data formatting".
+ message InstructionCategory {
+ }
+ // A single XLA instruction.
+ // name is the unique instruction id, like "%multiply.5".
+ message XLAInstruction {
+ string op = 1; // Opcode like %multiply
+ string expression = 2; // %multiply = [shape]multiply(operand1, operand2)
+ string provenance = 3; // Typically the TensorFlow operation name.
+ string category = 4;
+ }
+}
+
+// Measurements of an operation (or aggregated set of operations).
+// Metrics are always "total" rather than "self".
+message Metrics {
+ // Core-time taken by this operation, as a fraction of all operations.
+ double time = 1;
+ // Floating point computations performed by this operation, as a fraction of
+ // peak core FLOPS * program time. This representation has useful properties:
+ // - it is proportional to the number of floating point operations performed
+ // - utilization is flops/time
+ // - wasted potential flops is proportional to time - flops
+ // - it does not reveal the peak core FLOPS of the hardware
+ double flops = 2;
+
+ double raw_time = 11; // Elapsed core-time in picoseconds.
+ double raw_flops = 12; // Total floating-point operations performed.
+}
diff --git a/tensorflow/contrib/tpu/profiler/tpu_profiler.proto b/tensorflow/contrib/tpu/profiler/tpu_profiler.proto
index 21e355a519..d0a27f1a3d 100644
--- a/tensorflow/contrib/tpu/profiler/tpu_profiler.proto
+++ b/tensorflow/contrib/tpu/profiler/tpu_profiler.proto
@@ -2,6 +2,7 @@ syntax = "proto3";
package tensorflow;
import "tensorflow/core/framework/graph.proto";
+import "tensorflow/contrib/tpu/profiler/op_profile.proto";
// The TPUProfiler service retrieves performance information about
// the programs running on connected TPUs over a period of time.
@@ -34,4 +35,9 @@ message ProfileResponse {
// during the profiling period. Describes the devices and resources that
// 'trace_events' refers to.
bytes encoded_trace = 3;
+
+ // Assembles a hierarchical performance profile based on HLOs in trace events.
+ // If the trace covers multiple programs, the longest-running one is analyzed.
+ // See op_profile.proto for the detailed semantics of the returned profile.
+ tpu.op_profile.Profile op_profile = 4;
}