aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service/hlo_execution_profile.cc
diff options
context:
space:
mode:
authorGravatar David Majnemer <majnemer@google.com>2017-03-01 17:37:09 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-03-01 17:48:18 -0800
commitaf2c7253bb1f9d135ad9b0c6a271741205ab57fd (patch)
tree5bc2c03e568e73b419531679dce6a86d61064a72 /tensorflow/compiler/xla/service/hlo_execution_profile.cc
parent5ce1f684d2bb6e9220f68b9541d33342b5452918 (diff)
[XLA] Add support for profiling multiple computations
While we are here, add support for getting the cost analysis for call HLOs. Change: 148952748
Diffstat (limited to 'tensorflow/compiler/xla/service/hlo_execution_profile.cc')
-rw-r--r--tensorflow/compiler/xla/service/hlo_execution_profile.cc35
1 files changed, 26 insertions, 9 deletions
diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile.cc b/tensorflow/compiler/xla/service/hlo_execution_profile.cc
index e2a81a052c..447892c8de 100644
--- a/tensorflow/compiler/xla/service/hlo_execution_profile.cc
+++ b/tensorflow/compiler/xla/service/hlo_execution_profile.cc
@@ -21,6 +21,7 @@ limitations under the License.
#include "tensorflow/compiler/xla/metric_table_report.h"
#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_module.h"
#include "tensorflow/compiler/xla/types.h"
#include "tensorflow/compiler/xla/util.h"
#include "tensorflow/core/lib/strings/numbers.h"
@@ -32,6 +33,7 @@ namespace xla {
void HloExecutionProfile::AddProfileResult(const HloInstruction* hlo,
uint64 cycles_taken) {
hlo_to_cycles_taken_[hlo] = cycles_taken;
+ profiled_computations_.insert(hlo->parent());
}
uint64 HloExecutionProfile::GetProfileResult(const HloInstruction& hlo) const {
@@ -43,17 +45,30 @@ uint64 HloExecutionProfile::GetProfileResult(const HloInstruction& hlo) const {
}
string HloExecutionProfile::ToString(
+ const HloComputation& computation,
const DeviceDescription& device_description,
- const HloCostAnalysis& cost_analysis) const {
+ const HloCostAnalysis::ShapeSizeFunction& shape_size) const {
+ HloCostAnalysis cost_analysis(shape_size);
+ tensorflow::Status analysis_status =
+ computation.root_instruction()->Accept(&cost_analysis);
+ if (!analysis_status.ok()) {
+ return "";
+ }
+
using Item = std::pair<const HloInstruction*, uint64>;
- std::vector<Item> items(hlo_to_cycles_taken_.begin(),
- hlo_to_cycles_taken_.end());
+ std::vector<Item> items;
+ for (Item item : hlo_to_cycles_taken_) {
+ // Only include the HLOs which are part of the desired computation.
+ if (item.first->parent() == &computation) {
+ items.push_back(item);
+ }
+ }
auto custom_less = [](const Item& lhs, const Item& rhs) {
return lhs.second > rhs.second;
};
std::sort(items.begin(), items.end(), custom_less);
string result;
- const int64 total_cycles = total_cycles_executed();
+ const int64 total_cycles = total_cycles_executed(computation);
double clock_rate_ghz = device_description.clock_rate_ghz();
const auto cycles_to_microseconds = [&](double cycles) {
@@ -88,11 +103,13 @@ string HloExecutionProfile::ToString(
bytes_per_sec.c_str(), bytes_per_cycle.c_str(), name.c_str()));
};
tensorflow::strings::StrAppend(
- &result,
- tensorflow::strings::Printf("HLO execution profile: (%s @ f_nom)\n\t",
- tensorflow::strings::HumanReadableElapsedTime(
- total_cycles / clock_rate_ghz / 1e9)
- .c_str()));
+ &result, tensorflow::strings::Printf(
+ "HLO execution profile for %s: (%s @ f_nom)\n\t",
+ computation.name().c_str(),
+ tensorflow::strings::HumanReadableElapsedTime(
+ total_cycles / clock_rate_ghz / 1e9)
+ .c_str()));
+
append_item(total_cycles, -1, -1, "[total]");
for (const auto& item : items) {
const HloInstruction* hlo = item.first;