aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service/hlo_execution_profile.cc
diff options
context:
space:
mode:
authorGravatar Bjarke Hammersholt Roune <broune@google.com>2017-01-27 20:27:37 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-01-27 20:45:46 -0800
commit6c4077255fa4e6ae4e3e45122035f891ae803246 (patch)
tree644d1e75e986d5dbeecafebecb733f21682ebf07 /tensorflow/compiler/xla/service/hlo_execution_profile.cc
parentc4824086310a284bd41c49e22f11274a224f68ea (diff)
Add sum-across-opcodes report for HLO profiling.
Change: 145863928
Diffstat (limited to 'tensorflow/compiler/xla/service/hlo_execution_profile.cc')
-rw-r--r--tensorflow/compiler/xla/service/hlo_execution_profile.cc26
1 files changed, 23 insertions, 3 deletions
diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile.cc b/tensorflow/compiler/xla/service/hlo_execution_profile.cc
index edba55f6cd..0b87b04fc4 100644
--- a/tensorflow/compiler/xla/service/hlo_execution_profile.cc
+++ b/tensorflow/compiler/xla/service/hlo_execution_profile.cc
@@ -19,6 +19,7 @@ limitations under the License.
#include <utility>
#include <vector>
+#include "tensorflow/compiler/xla/metric_table_report.h"
#include "tensorflow/compiler/xla/service/hlo_instruction.h"
#include "tensorflow/compiler/xla/types.h"
#include "tensorflow/compiler/xla/util.h"
@@ -54,15 +55,19 @@ string HloExecutionProfile::ToString(
string result;
const int64 total_cycles = total_cycles_executed();
double clock_rate_ghz = device_description.clock_rate_ghz();
- auto append_item = [&result, total_cycles, clock_rate_ghz](
- int64 cycles, int64 flops, const string& name) {
+
+ const auto cycles_to_microseconds = [&](double cycles) {
+ return cycles / clock_rate_ghz / 1000.0;
+ };
+
+ auto append_item = [&](int64 cycles, int64 flops, const string& name) {
double nsecs = cycles / clock_rate_ghz;
tensorflow::strings::StrAppend(
&result,
tensorflow::strings::Printf(
"%15lld cycles (%6.2f%%) :: %12.1f usec @ f_nom :: %18s :: %s",
cycles, cycles / static_cast<double>(total_cycles) * 100,
- nsecs / 1e3,
+ cycles_to_microseconds(cycles),
flops <= 0 ? "<none>" : HumanReadableNumFlops(flops, nsecs).c_str(),
name.c_str()));
};
@@ -81,6 +86,21 @@ string HloExecutionProfile::ToString(
string display = item.first == nullptr ? "<none>" : item.first->ToString();
append_item(item.second, flops, display);
}
+
+ MetricTableReport table;
+ table.SetMetricName("microseconds");
+ table.SetEntryName("ops");
+ table.SetShowCategoryTable();
+ for (const auto& item : items) {
+ MetricTableReport::Entry entry;
+ entry.text = item.first->ToString();
+ entry.short_text = item.first->ToString(/*compact_operands=*/true);
+ entry.category_text = item.first->ToCategory();
+ entry.metric = cycles_to_microseconds(item.second);
+ table.AddEntry(std::move(entry));
+ }
+ result += table.MakeReport(cycles_to_microseconds(total_cycles));
+
return result;
}