aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service/hlo_execution_profile.cc
diff options
context:
space:
mode:
authorGravatar Sanjoy Das <sanjoy@google.com>2018-01-24 16:10:12 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-01-24 16:23:00 -0800
commitffa63e57bdd703ae051ae849af5b5a272fca2223 (patch)
treebf95aadf07a2a8c590a238fbc9227d7cdf7f4360 /tensorflow/compiler/xla/service/hlo_execution_profile.cc
parent7bf8ccdb4ef5b0b28c1cf0d5084e07ffbf0e2703 (diff)
[TF:XLA] Replace most of HloProfilePrinter by a protocol buffer
This change replaces the meat of HloProfilePrinter with a protobuf HloProfilePrinterData. The original plan was to serialize HloProfilePrinter into C++ source code and put that in a .cc file along with the string for the xla::ProgramShape. However, since we now directly serialize xla::ProgramShape into a .o file, for consistency I think we should do the same thing for HloProfilePrinter (instead of adding yet another output file to tfcompile). The change itself is fairly simple, it is large mostly due to the mass renaming I had to do. PiperOrigin-RevId: 183158192
Diffstat (limited to 'tensorflow/compiler/xla/service/hlo_execution_profile.cc')
-rw-r--r--tensorflow/compiler/xla/service/hlo_execution_profile.cc118
1 files changed, 55 insertions, 63 deletions
diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile.cc b/tensorflow/compiler/xla/service/hlo_execution_profile.cc
index 849aac0b12..f0df93b61d 100644
--- a/tensorflow/compiler/xla/service/hlo_execution_profile.cc
+++ b/tensorflow/compiler/xla/service/hlo_execution_profile.cc
@@ -40,83 +40,75 @@ HloProfileIndexMap::HloProfileIndexMap(const HloModule& module) {
}
}
-std::unique_ptr<HloProfilePrinter> CreateHloProfilePrinter(
+std::unique_ptr<HloProfilePrinterData> CreateHloProfilePrinterData(
const HloProfileIndexMap& hlo_profile_index_map,
const HloCostAnalysis& cost_analysis) {
- using HloComputationInfo = HloProfilePrinter::HloComputationInfo;
- using HloInstructionInfo = HloProfilePrinter::HloInstructionInfo;
-
- HloComputationInfo* computation_infos =
- new HloComputationInfo[hlo_profile_index_map.computation_count()];
-
- // There are two "indices" in play here. The first one is the index of the
- // HloComputationInfo or HloInstructionInfo in the array that contains said
- // HloComputationInfo or HloInstructionInfo. The second index is the index of
- // the HloComputationInfo or HloInstructionInfo in the profile counters array,
- // as decided by hlo_profile_index_map. The latter index is always referred
- // to as "profile_index".
-
- size_t computation_index_in_static_data = 0;
- size_t max_profile_index = hlo_profile_index_map.total_count();
- for (const auto& pair : hlo_profile_index_map.computation_to_profile_idx()) {
- CHECK_LT(pair.second, max_profile_index);
+ using HloComputationInfo = HloProfilePrinterData::HloComputationInfo;
+ using HloInstructionInfo = HloProfilePrinterData::HloInstructionInfo;
+
+ size_t profile_counters_size = hlo_profile_index_map.total_count();
+
+ std::unique_ptr<HloProfilePrinterData> profile_printer_data =
+ MakeUnique<HloProfilePrinterData>();
+ profile_printer_data->set_profile_counters_size(profile_counters_size);
+ profile_printer_data->mutable_computation_infos()->Reserve(
+ hlo_profile_index_map.computation_count());
+
+ const auto& computation_to_profile_idx_map =
+ hlo_profile_index_map.computation_to_profile_idx();
+
+ // computation_to_profile_idx_map's order is not deterministic so create a
+ // deterministic computation_and_profile_idx_list so that we end up with a
+ // deterministic HloProfilePrinterData protobuf.
+
+ std::vector<std::pair<const HloComputation*, int64>>
+ computation_and_profile_idx_list(computation_to_profile_idx_map.begin(),
+ computation_to_profile_idx_map.end());
+
+ // The profile indices were computed deterministically in
+ // HloProfileIndexMap::HloProfileIndexMap.
+ c_sort(computation_and_profile_idx_list,
+ [](const std::pair<const HloComputation*, int64>& left,
+ const std::pair<const HloComputation*, int64>& right) {
+ return left.second < right.second;
+ });
+
+ for (const auto& pair : computation_and_profile_idx_list) {
+ CHECK_LT(pair.second, profile_counters_size);
const HloComputation* computation = pair.first;
- size_t current_computation_index = computation_index_in_static_data++;
HloComputationInfo* computation_info =
- &computation_infos[current_computation_index];
+ profile_printer_data->add_computation_infos();
- computation_info->name = strdup(computation->name().c_str());
- computation_info->profile_index = pair.second;
- computation_info->instructions =
- new HloInstructionInfo[computation->instruction_count()];
- computation_info->instructions_size = computation->instruction_count();
+ computation_info->set_name(computation->name());
+ computation_info->set_profile_index(pair.second);
+ computation_info->mutable_instruction_infos()->Reserve(
+ computation->instruction_count());
- size_t instruction_index_in_static_data = 0;
for (const HloInstruction* hlo : computation->instructions()) {
- HloProfilePrinter::HloInstructionInfo* instruction_info =
- &computation_info->instructions[instruction_index_in_static_data++];
- instruction_info->long_name = strdup(hlo->ToString().c_str());
- instruction_info->short_name = strdup(
- hlo->ToString(HloPrintOptions().set_compact_operands(true)).c_str());
- instruction_info->category = strdup(hlo->ToCategory().c_str());
- instruction_info->flop_count = cost_analysis.flop_count(*hlo);
- instruction_info->transcendental_count =
- cost_analysis.transcendental_count(*hlo);
- instruction_info->bytes_accessed = cost_analysis.bytes_accessed(*hlo);
- instruction_info->optimal_seconds = cost_analysis.optimal_seconds(*hlo);
- instruction_info->profile_index =
- hlo_profile_index_map.GetProfileIndexFor(*hlo);
- CHECK_LT(instruction_info->profile_index, max_profile_index);
+ HloInstructionInfo* instruction_info =
+ computation_info->add_instruction_infos();
+ instruction_info->set_long_name(hlo->ToString());
+ instruction_info->set_short_name(
+ hlo->ToString(HloPrintOptions().set_compact_operands(true)));
+ instruction_info->set_category(hlo->ToCategory());
+ instruction_info->set_flop_count(cost_analysis.flop_count(*hlo));
+ instruction_info->set_transcendental_count(
+ cost_analysis.transcendental_count(*hlo));
+ instruction_info->set_bytes_accessed(cost_analysis.bytes_accessed(*hlo));
+ instruction_info->set_optimal_seconds(
+ cost_analysis.optimal_seconds(*hlo));
+ instruction_info->set_profile_index(
+ hlo_profile_index_map.GetProfileIndexFor(*hlo));
}
}
- auto deleter = [](HloProfilePrinter::HloComputationInfo* computation_infos,
- int64 computation_infos_size) {
- for (int64 i = 0; i < computation_infos_size; i++) {
- HloInstructionInfo* instruction_infos = computation_infos[i].instructions;
- for (int64 j = 0; j < computation_infos[i].instructions_size; j++) {
- // We can't make instruction_infos[j].long_name etc. non-const pointers
- // since they may point into static storage, so we have a const_cast
- // here.
- free(const_cast<char*>(instruction_infos[j].long_name));
- free(const_cast<char*>(instruction_infos[j].short_name));
- free(const_cast<char*>(instruction_infos[j].category));
- }
- delete[] instruction_infos;
- free(const_cast<char*>(computation_infos[i].name));
- }
- delete[] computation_infos;
- };
-
- return MakeUnique<HloProfilePrinter>(
- computation_infos, hlo_profile_index_map.computation_count(),
- /*profile_counters_size=*/max_profile_index, deleter);
+ return profile_printer_data;
}
HloExecutionProfile::HloExecutionProfile(
- const HloProfilePrinter* hlo_profile_printer,
+ const HloProfilePrinterData* hlo_profile_printer_data,
const HloProfileIndexMap* hlo_profile_index_map)
- : hlo_profile_printer_(*hlo_profile_printer),
+ : hlo_profile_printer_data_(*hlo_profile_printer_data),
hlo_profile_index_map_(*hlo_profile_index_map),
profile_counters_(
/*count*/ hlo_profile_index_map_.total_count(),