aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar Justin Lebar <jlebar@google.com>2017-07-19 15:07:08 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-07-19 15:16:56 -0700
commit4c849137952b649785d8a8ed591fbb77b1f49498 (patch)
tree66d6c5489f85fbbfe23951ccc6594a740b2e3f0e
parent9cc871e81c04ed11829c3364546b4500742140eb (diff)
Split HLO profile display logic out of hlo_execution_profile.cc, moving
it into execution_profile_builder.cc. PiperOrigin-RevId: 162541782
-rw-r--r--tensorflow/compiler/xla/service/BUILD13
-rw-r--r--tensorflow/compiler/xla/service/hlo_execution_profile.cc102
-rw-r--r--tensorflow/compiler/xla/service/human_readable_profile_builder.cc96
-rw-r--r--tensorflow/compiler/xla/service/human_readable_profile_builder.h82
4 files changed, 202 insertions, 91 deletions
diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD
index ada130aa84..9c94091412 100644
--- a/tensorflow/compiler/xla/service/BUILD
+++ b/tensorflow/compiler/xla/service/BUILD
@@ -1084,6 +1084,18 @@ cc_library(
)
cc_library(
+ name = "human_readable_profile_builder",
+ srcs = ["human_readable_profile_builder.cc"],
+ hdrs = ["human_readable_profile_builder.h"],
+ deps = [
+ "//tensorflow/compiler/xla:metric_table_report",
+ "//tensorflow/compiler/xla:types",
+ "//tensorflow/compiler/xla:util",
+ "//tensorflow/core:lib",
+ ],
+)
+
+cc_library(
name = "generic_transfer_manager",
srcs = ["generic_transfer_manager.cc"],
hdrs = ["generic_transfer_manager.h"],
@@ -1211,6 +1223,7 @@ cc_library(
deps = [
":hlo",
":hlo_cost_analysis",
+ ":human_readable_profile_builder",
"//tensorflow/compiler/xla:metric_table_report",
"//tensorflow/compiler/xla:types",
"//tensorflow/compiler/xla:util",
diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile.cc b/tensorflow/compiler/xla/service/hlo_execution_profile.cc
index 9e25f1aceb..7a83a92404 100644
--- a/tensorflow/compiler/xla/service/hlo_execution_profile.cc
+++ b/tensorflow/compiler/xla/service/hlo_execution_profile.cc
@@ -19,14 +19,11 @@ limitations under the License.
#include <utility>
#include <vector>
-#include "tensorflow/compiler/xla/metric_table_report.h"
#include "tensorflow/compiler/xla/service/hlo_instruction.h"
#include "tensorflow/compiler/xla/service/hlo_module.h"
+#include "tensorflow/compiler/xla/service/human_readable_profile_builder.h"
#include "tensorflow/compiler/xla/types.h"
#include "tensorflow/compiler/xla/util.h"
-#include "tensorflow/core/lib/strings/numbers.h"
-#include "tensorflow/core/lib/strings/strcat.h"
-#include "tensorflow/core/lib/strings/stringprintf.h"
namespace xla {
@@ -55,96 +52,19 @@ string HloExecutionProfile::ToString(
return "";
}
- using Item = std::pair<const HloInstruction*, uint64>;
- std::vector<Item> items;
- for (Item item : hlo_to_cycles_taken_) {
- // Only include the HLOs which are part of the desired computation.
- if (item.first->parent() == &computation) {
- items.push_back(item);
- }
- }
- auto custom_less = [](const Item& lhs, const Item& rhs) {
- return lhs.second > rhs.second;
- };
- std::sort(items.begin(), items.end(), custom_less);
- string result;
- const int64 total_cycles = total_cycles_executed(computation);
- double clock_rate_ghz = device_description.clock_rate_ghz();
- CHECK_GE(clock_rate_ghz, 1e-9);
-
- const auto cycles_to_microseconds = [&](double cycles) {
- return cycles / clock_rate_ghz / 1000.0;
- };
-
- auto append_item = [&](int64 cycles, int64 flops, int64 bytes_accessed,
- const string& name) {
- double nsecs = cycles / clock_rate_ghz;
- string bytes_per_sec;
- string bytes_per_cycle;
- if (cycles <= 0 || bytes_accessed < 0) {
- bytes_per_sec = "<unknown>";
- bytes_per_cycle = "<unknown>";
- } else {
- bytes_per_sec = tensorflow::strings::HumanReadableNumBytes(
- bytes_accessed / (nsecs / 1e9));
- bytes_per_cycle =
- tensorflow::strings::HumanReadableNumBytes(bytes_accessed / cycles);
- }
-
- double cycles_percent = 0;
- if (total_cycles > 0) {
- cycles_percent = cycles / static_cast<double>(total_cycles) * 100;
- }
-
- tensorflow::strings::StrAppend(
- &result,
- tensorflow::strings::Printf(
- "%15lld cycles (%6.2f%%) :: %12.1f usec @ f_nom :: %18s :: %12s/s "
- ":: "
- "%12s/cycle :: "
- "%s",
- cycles, cycles_percent, cycles_to_microseconds(cycles),
- flops <= 0 ? "<none>" : HumanReadableNumFlops(flops, nsecs).c_str(),
- bytes_per_sec.c_str(), bytes_per_cycle.c_str(), name.c_str()));
- };
- tensorflow::strings::StrAppend(
- &result, tensorflow::strings::Printf(
- "HLO execution profile for %s: (%s @ f_nom)\n\t",
- computation.name().c_str(),
- tensorflow::strings::HumanReadableElapsedTime(
- total_cycles / clock_rate_ghz / 1e9)
- .c_str()));
-
- append_item(total_cycles, -1, -1, "[total]");
- for (const auto& item : items) {
+ HumanReadableProfileBuilder builder(computation.name(),
+ total_cycles_executed(computation),
+ device_description.clock_rate_ghz());
+ for (const auto& item : hlo_to_cycles_taken_) {
const HloInstruction* hlo = item.first;
- tensorflow::strings::StrAppend(&result, "\n\t");
- const int64 flops = (hlo == nullptr) ? -1 : cost_analysis.flop_count(*hlo);
- const int64 bytes_accessed =
- (hlo == nullptr) ? -1 : cost_analysis.bytes_accessed(*hlo);
- const string display = (hlo == nullptr) ? "<none>" : hlo->ToString();
- append_item(item.second, flops, bytes_accessed, display);
- }
+ int64 cycles = item.second;
- if (total_cycles <= 0) {
- result += "****** 0 total cycles ******\n";
- } else {
- MetricTableReport table;
- table.SetMetricName("microseconds");
- table.SetEntryName("ops");
- table.SetShowCategoryTable();
- for (const auto& item : items) {
- MetricTableReport::Entry entry;
- entry.text = item.first->ToString();
- entry.short_text = item.first->ToString(/*compact_operands=*/true);
- entry.category_text = item.first->ToCategory();
- entry.metric = cycles_to_microseconds(item.second);
- table.AddEntry(std::move(entry));
- }
- result += table.MakeReport(cycles_to_microseconds(total_cycles));
+ builder.AddOp(/*op_name=*/hlo->ToString(),
+ /*short_name=*/hlo->ToString(/*compact_operands=*/true),
+ hlo->ToCategory(), cycles, cost_analysis.flop_count(*hlo),
+ cost_analysis.bytes_accessed(*hlo));
}
-
- return result;
+ return builder.ToString();
}
} // namespace xla
diff --git a/tensorflow/compiler/xla/service/human_readable_profile_builder.cc b/tensorflow/compiler/xla/service/human_readable_profile_builder.cc
new file mode 100644
index 0000000000..1b9a7a297f
--- /dev/null
+++ b/tensorflow/compiler/xla/service/human_readable_profile_builder.cc
@@ -0,0 +1,96 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/human_readable_profile_builder.h"
+#include "tensorflow/compiler/xla/metric_table_report.h"
+#include "tensorflow/compiler/xla/util.h"
+#include "tensorflow/core/lib/strings/numbers.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+
+namespace xla {
+
+using tensorflow::strings::Appendf;
+using tensorflow::strings::HumanReadableElapsedTime;
+using tensorflow::strings::HumanReadableNumBytes;
+using tensorflow::strings::StrAppend;
+
+string HumanReadableProfileBuilder::ToString() const {
+ string s;
+
+ Appendf(&s, "Execution profile for %s: (%s @ f_nom)\n",
+ computation_name_.c_str(),
+ HumanReadableElapsedTime(CyclesToSeconds(total_cycles_)).c_str());
+
+ auto append_op = [&](const OpInfo& op) {
+ string bytes_per_sec;
+ string bytes_per_cycle;
+ if (op.cycles <= 0 || op.bytes_accessed < 0) {
+ bytes_per_sec = "<unknown>";
+ bytes_per_cycle = "<unknown>";
+ } else {
+ bytes_per_sec =
+ HumanReadableNumBytes(op.bytes_accessed / CyclesToSeconds(op.cycles));
+ bytes_per_cycle = HumanReadableNumBytes(op.bytes_accessed / op.cycles);
+ }
+
+ double cycles_percent = 0;
+ if (total_cycles_ > 0) {
+ cycles_percent = op.cycles / static_cast<double>(total_cycles_) * 100;
+ }
+
+ double nsecs = op.cycles / clock_rate_ghz_;
+ Appendf(&s,
+ "\t%15lld cycles (%6.2f%%) :: %12.1f usec @ f_nom :: %18s "
+ ":: %12s/s :: %12s/cycle :: %s\n",
+ op.cycles, cycles_percent, CyclesToMicroseconds(op.cycles),
+ op.flop_count <= 0
+ ? "<none>"
+ : HumanReadableNumFlops(op.flop_count, nsecs).c_str(),
+ bytes_per_sec.c_str(), bytes_per_cycle.c_str(), op.name.c_str());
+ };
+
+ append_op({"[total]", "[total]", /*category=*/"", total_cycles_, -1, -1});
+
+ // Sort ops in decreasing order of cycles.
+ std::vector<OpInfo> sorted_ops(op_infos_);
+ std::sort(
+ sorted_ops.begin(), sorted_ops.end(),
+ [](const OpInfo& a, const OpInfo& b) { return a.cycles > b.cycles; });
+ for (const auto& op : sorted_ops) {
+ append_op(op);
+ }
+
+ if (total_cycles_ <= 0) {
+ StrAppend(&s, "****** 0 total cycles ******\n");
+ } else {
+ MetricTableReport table;
+ table.SetMetricName("microseconds");
+ table.SetEntryName("ops");
+ table.SetShowCategoryTable();
+ for (const auto& op : sorted_ops) {
+ MetricTableReport::Entry entry;
+ entry.text = op.name;
+ entry.short_text = op.short_name;
+ entry.category_text = op.category;
+ entry.metric = CyclesToMicroseconds(op.cycles);
+ table.AddEntry(std::move(entry));
+ }
+ StrAppend(&s, table.MakeReport(CyclesToMicroseconds(total_cycles_)));
+ }
+ return s;
+}
+
+} // namespace xla
diff --git a/tensorflow/compiler/xla/service/human_readable_profile_builder.h b/tensorflow/compiler/xla/service/human_readable_profile_builder.h
new file mode 100644
index 0000000000..1a69cbf8bf
--- /dev/null
+++ b/tensorflow/compiler/xla/service/human_readable_profile_builder.h
@@ -0,0 +1,82 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HUMAN_READABLE_PROFILE_BUILDER_H_
+#define TENSORFLOW_COMPILER_XLA_SERVICE_HUMAN_READABLE_PROFILE_BUILDER_H_
+
+#include <vector>
+
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/core/lib/core/stringpiece.h"
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/types.h"
+
+namespace xla {
+
+// HumanReadableProfileBuilder helps you create a textual profile of a
+// computation, suitable for consumption by humans.
+class HumanReadableProfileBuilder {
+ public:
+ explicit HumanReadableProfileBuilder(tensorflow::StringPiece computation_name,
+ int64 total_cycles,
+ double clock_rate_ghz)
+ : computation_name_(computation_name.ToString()),
+ total_cycles_(total_cycles),
+ clock_rate_ghz_(clock_rate_ghz) {
+ CHECK_GE(clock_rate_ghz, 1e-9);
+ }
+
+ int64 total_cycles() const { return total_cycles_; }
+
+ // Adds an operation to the profile. If you don't know the number of
+ // floating-point ops or bytes touched by the op, pass -1 for that param.
+ void AddOp(tensorflow::StringPiece op_name,
+ tensorflow::StringPiece short_name,
+ tensorflow::StringPiece category, int64 cycles, int64 flop_count,
+ int64 bytes_accessed) {
+ op_infos_.push_back({op_name.ToString(), short_name.ToString(),
+ category.ToString(), cycles, flop_count,
+ bytes_accessed});
+ }
+
+ // Gets the human-readable profile.
+ string ToString() const;
+
+ private:
+ struct OpInfo {
+ string name;
+ string short_name;
+ string category;
+ int64 cycles;
+ int64 flop_count;
+ int64 bytes_accessed;
+ };
+
+ double CyclesToSeconds(int64 cycles) const {
+ return cycles / clock_rate_ghz_ / 1e9;
+ }
+ double CyclesToMicroseconds(int64 cycles) const {
+ return cycles / clock_rate_ghz_ / 1000.0;
+ }
+
+ string computation_name_;
+ int64 total_cycles_;
+ double clock_rate_ghz_;
+ std::vector<OpInfo> op_infos_;
+};
+
+} // namespace xla
+
+#endif // TENSORFLOW_COMPILER_XLA_SERVICE_HUMAN_READABLE_PROFILE_BUILDER_H_